From 446ee446d9ae66f36e95c3c90bbcc4e56b94cde0 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 10 Apr 2026 09:20:04 +0200 Subject: [PATCH 001/554] xen/privcmd: fix double free via VMA splitting commit 24daca4fc07f3ff8cd0e3f629cd982187f48436a upstream. privcmd_vm_ops defines .close (privcmd_close), but neither .may_split nor .open. When userspace does a partial munmap() on a privcmd mapping, the kernel splits the VMA via __split_vma(). Since may_split is NULL, the split is allowed. vm_area_dup() copies vm_private_data (a pages array allocated in alloc_empty_pages()) into the new VMA without any fixup, because there is no .open callback. Both VMAs now point to the same pages array. When the unmapped portion is closed, privcmd_close() calls: - xen_unmap_domain_gfn_range() - xen_free_unpopulated_pages() - kvfree(pages) The surviving VMA still holds the dangling pointer. When it is later destroyed, the same sequence runs again, which leads to a double free. Fix this issue by adding a .may_split callback denying the VMA split. This is XSA-487 / CVE-2026-31787 Fixes: d71f513985c2 ("xen: privcmd: support autotranslated physmap guests.") Reported-by: Atharva Vartak Suggested-by: Atharva Vartak Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Greg Kroah-Hartman --- drivers/xen/privcmd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index cbc62f0df11b7..f37d8d212c066 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -1619,6 +1619,12 @@ static void privcmd_close(struct vm_area_struct *vma) kvfree(pages); } +static int privcmd_may_split(struct vm_area_struct *area, unsigned long addr) +{ + /* Forbid splitting, avoids double free via privcmd_close(). */ + return -EINVAL; +} + static vm_fault_t privcmd_fault(struct vm_fault *vmf) { printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", @@ -1630,6 +1636,7 @@ static vm_fault_t privcmd_fault(struct vm_fault *vmf) static const struct vm_operations_struct privcmd_vm_ops = { .close = privcmd_close, + .may_split = privcmd_may_split, .fault = privcmd_fault }; From d5f59216650c51e5e3fcb7517c825bc8047f60ef Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 27 Mar 2026 14:13:38 +0100 Subject: [PATCH 002/554] Buffer overflow in drivers/xen/sys-hypervisor.c commit 27fdbab4221b375de54bf91919798d88520c6e28 upstream. The build id returned by HYPERVISOR_xen_version(XENVER_build_id) is neither NUL terminated nor a string. The first causes a buffer overflow as sprintf in buildid_show will read and copy till it finds a NUL. 00000000 f4 91 51 f4 dd 38 9e 9d 65 47 52 eb 10 71 db 50 |..Q..8..eGR..q.P| 00000010 b9 a8 01 42 6f 2e 32 |...Bo.2| 00000017 So use a memcpy instead of sprintf to have the correct value: 00000000 f4 91 51 f4 dd 00 9e 9d 65 47 52 eb 10 71 db 50 |..Q.....eGR..q.P| 00000010 b9 a8 01 42 |...B| 00000014 (the above have a hack to embed a zero inside and check it's returned correctly). This is XSA-485 / CVE-2026-31786 Fixes: 84b7625728ea ("xen: add sysfs node for hypervisor build id") Signed-off-by: Frediano Ziglio Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/xen/sys-hypervisor.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index 2f880374b4632..c1a0ca1b1b5fa 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -366,6 +366,8 @@ static ssize_t buildid_show(struct hyp_sysfs_attr *attr, char *buffer) ret = sprintf(buffer, ""); return ret; } + if (ret > PAGE_SIZE) + return -ENOSPC; buildid = kmalloc(sizeof(*buildid) + ret, GFP_KERNEL); if (!buildid) @@ -373,8 +375,10 @@ static ssize_t buildid_show(struct hyp_sysfs_attr *attr, char *buffer) buildid->len = ret; ret = HYPERVISOR_xen_version(XENVER_build_id, buildid); - if (ret > 0) - ret = sprintf(buffer, "%s", buildid->buf); + if (ret > 0) { + /* Build id is binary, not a string. */ + memcpy(buffer, buildid->buf, ret); + } kfree(buildid); return ret; From 1fe06068166d4fc16722201f267b1fe19efad639 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 30 Apr 2026 11:13:53 +0200 Subject: [PATCH 003/554] Linux 6.18.26 Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c8343ec96a09a..f1b9b5849b79b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 18 -SUBLEVEL = 25 +SUBLEVEL = 26 EXTRAVERSION = NAME = Baby Opossum Posse From 4d7893a137eadb6163ea4298bf67d74b811d76ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Wed, 15 Apr 2026 12:04:53 -0300 Subject: [PATCH 004/554] ALSA: usb-audio: stop parsing UAC2 rates at MAX_NR_RATES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3c318f97dcc50b2e0556a1813bd6958678e881fd upstream. parse_uac2_sample_rate_range() caps the number of enumerated rates at MAX_NR_RATES, but it only breaks out of the current rate loop. A malformed UAC2 RANGE response with additional triplets continues parsing the remaining triplets and repeatedly prints "invalid uac2 rates" while probe still holds register_mutex. Stop the whole parse once the cap is reached and return the number of rates collected so far. Fixes: 4fa0e81b8350 ("ALSA: usb-audio: fix possible hang and overflow in parse_uac2_sample_rate_range()") Cc: stable@vger.kernel.org Reported-by: syzbot+d56178c27a4710960820@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d56178c27a4710960820 Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260415-usb-audio-uac2-rate-cap-v1-1-5ecbafc120d8@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/format.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/format.c b/sound/usb/format.c index 53b5dc5453b78..133595d79d927 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -463,7 +463,7 @@ static int parse_uac2_sample_rate_range(struct snd_usb_audio *chip, nr_rates++; if (nr_rates >= MAX_NR_RATES) { usb_audio_err(chip, "invalid uac2 rates\n"); - break; + return nr_rates; } skip_rate: From 9bbbbbd404d30df74076010a60e165b56e3e7f6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Tue, 21 Apr 2026 21:53:52 -0300 Subject: [PATCH 005/554] ALSA: usb-audio: Avoid false E-MU sample-rate notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fca9c850042a7ab4828ce3a9caa8bc40ea09856a upstream. snd_emuusb_set_samplerate() unconditionally notifies the E-MU SampleRate Extension Unit control after issuing SET_CUR. If snd_usb_mixer_set_ctl_value() fails, the control value has not changed, yet snd_usb_mixer_notify_id() still invalidates the cache and emits a value-change event to userspace. Notify the control only after a successful write. Fixes: 7d2b451e65d2 ("ALSA: usb-audio - Added functionality for E-mu 0404USB/0202USB/TrackerPre") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260421-alsa-emuusb-samplerate-notify-v1-1-8b63bbc1d7f1@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_quirks.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 4873b5e748016..58f6a33f0ba92 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -1538,15 +1538,17 @@ void snd_emuusb_set_samplerate(struct snd_usb_audio *chip, { struct usb_mixer_interface *mixer; struct usb_mixer_elem_info *cval; + int err; int unitid = 12; /* SampleRate ExtensionUnit ID */ list_for_each_entry(mixer, &chip->mixer_list, list) { if (mixer->id_elems[unitid]) { cval = mixer_elem_list_to_info(mixer->id_elems[unitid]); - snd_usb_mixer_set_ctl_value(cval, UAC_SET_CUR, - cval->control << 8, - samplerate_id); - snd_usb_mixer_notify_id(mixer, unitid); + err = snd_usb_mixer_set_ctl_value(cval, UAC_SET_CUR, + cval->control << 8, + samplerate_id); + if (!err) + snd_usb_mixer_notify_id(mixer, unitid); break; } } From f28a8281792b0ec47e85aabe36543369e0823de7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Tue, 21 Apr 2026 22:07:41 -0300 Subject: [PATCH 006/554] ALSA: usb-audio: Fix Audio Advantage Micro II SPDIF switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a9224f26b754b5034719248891ff3c2ea0d11144 upstream. snd_microii_spdif_switch_put() returns 0 when the requested vendor register value differs from the cached one. This comparison was inverted by the resume-support conversion, so real SPDIF switch toggles are ignored while no-op writes still issue SET_CUR and report success. Return early only when the requested value matches the cached one. Fixes: 288673beae6c ("ALSA: usb-audio: Add resume support for MicroII SPDIF ctls") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260421-microii-spdif-switch-fix-v1-1-5c50dc28b88f@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 58f6a33f0ba92..7126a2cf9e79b 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -2027,7 +2027,7 @@ static int snd_microii_spdif_switch_put(struct snd_kcontrol *kcontrol, int err; reg = ucontrol->value.integer.value[0] ? 0x28 : 0x2a; - if (reg != list->kctl->private_value) + if (reg == list->kctl->private_value) return 0; kcontrol->private_value = reg; From 49340680d41fd540c1e67b98cf4d5310c7d6a35d Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 2 Apr 2026 16:13:42 +0300 Subject: [PATCH 007/554] usb: xhci: Make usb_host_endpoint.hcpriv survive endpoint_disable() commit 25e531b422dc2ac90cdae3b6e74b5cdeb081440d upstream. xHCI hardware maintains its endpoint state between add_endpoint() and drop_endpoint() calls followed by successful check_bandwidth(). So does the driver. Core may call endpoint_disable() during xHCI endpoint life, so don't clear host_ep->hcpriv then, because this breaks endpoint_reset(). If a driver calls usb_set_interface(), submits URBs which make host sequence state non-zero and calls usb_clear_halt(), the device clears its sequence state but xhci_endpoint_reset() bails out. The next URB malfunctions: USB2 loses one packet, USB3 gets Transaction Error or may not complete at all on some (buggy?) HCs from ASMedia and AMD. This is triggered by uvcvideo on bulk video devices. The code was copied from ehci_endpoint_disable() but it isn't needed here - hcpriv should only be NULL on emulated root hub endpoints. It might prevent resetting and inadvertently enabling a disabled and dropped endpoint, but core shouldn't try to reset dropped endpoints. Document xhci requirements regarding hcpriv. They are currently met. Fixes: 18b74067ac78 ("xhci: Fix use-after-free regression in xhci clear hub TT implementation") Cc: stable@vger.kernel.org Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://patch.msgid.link/20260402131342.2628648-26-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 1 - include/linux/usb.h | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index d84dded5333fb..a8dee373a485e 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3278,7 +3278,6 @@ static void xhci_endpoint_disable(struct usb_hcd *hcd, xhci_dbg(xhci, "endpoint disable with ep_state 0x%x\n", ep->ep_state); done: - host_ep->hcpriv = NULL; spin_unlock_irqrestore(&xhci->lock, flags); } diff --git a/include/linux/usb.h b/include/linux/usb.h index 2511f1e5b114d..375ee6e202dbe 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -55,7 +55,8 @@ struct ep_device; * @eusb2_isoc_ep_comp: eUSB2 isoc companion descriptor for this endpoint * @urb_list: urbs queued to this endpoint; maintained by usbcore * @hcpriv: for use by HCD; typically holds hardware dma queue head (QH) - * with one or more transfer descriptors (TDs) per urb + * with one or more transfer descriptors (TDs) per urb; must be preserved + * by core while BW is allocated for the endpoint * @ep_dev: ep_device for sysfs info * @extra: descriptors following this endpoint in the configuration * @extralen: how many bytes of "extra" are valid From 6f480aa64d4db89639b5e0cd38ffd6d013b3d76b Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 2 Apr 2026 15:14:57 +0800 Subject: [PATCH 008/554] usb: chipidea: otg: not wait vbus drop if use role_switch commit a4e99587102a83ee911c670752fbca694c7e557f upstream. The usb role switch will update ID and VBUS states at the same time, and vbus will not drop when execute data role swap in Type-C usecase. So lets not wait vbus drop in usb role switch case too. Fixes: e1b5d2bed67c ("usb: chipidea: core: handle usb role switch in a common way") Cc: stable@vger.kernel.org Acked-by: Peter Chen Reviewed-by: Jun Li Signed-off-by: Xu Yang Link: https://patch.msgid.link/20260402071457.2516021-3-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/otg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c index 647e98f4e3511..2371789effa05 100644 --- a/drivers/usb/chipidea/otg.c +++ b/drivers/usb/chipidea/otg.c @@ -187,8 +187,8 @@ void ci_handle_id_switch(struct ci_hdrc *ci) ci_role_stop(ci); - if (role == CI_ROLE_GADGET && - IS_ERR(ci->platdata->vbus_extcon.edev)) + if (role == CI_ROLE_GADGET && !ci->role_switch && + IS_ERR(ci->platdata->vbus_extcon.edev)) /* * Wait vbus lower than OTGSC_BSV before connecting * to host. If connecting status is from an external From b216de91c0c048b2b451923889a3f0c8cd3416bb Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 2 Apr 2026 15:14:56 +0800 Subject: [PATCH 009/554] usb: chipidea: core: allow ci_irq_handler() handle both ID and VBUS change commit b94b631d9f78e653855f7fb58dbcb86c2a856f6f upstream. For USB role switch-triggered IRQ, ID and VBUS change come together, for example when switching from host to device mode. ID indicate a role switch and VBUS is required to determine whether the device controller can start operating. Currently, ci_irq_handler() handles only a single event per invocation. This can cause an issue where switching to device mode results in the device controller not working at all. Allowing ci_irq_handler() to handle both ID and VBUS change in one call resolves this issue. Meanwhile, this change also affects the VBUS event handling logic. Previously, if an ID event indicated host mode the VBUS IRQ will be ignored as the device disable BSE when stop() is called. With the new behavior, if ID and VBUS IRQ occur together and the target mode is host, the VBUS event is queued and ci_handle_vbus_change() will call usb_gadget_vbus_connect(), after which USBMODE is switched to device mode, causing host mode to stop working. To prevent this, an additional check is added to skip handling VBUS event when current role is not device mode. Suggested-by: Peter Chen Fixes: e1b5d2bed67c ("usb: chipidea: core: handle usb role switch in a common way") Cc: stable@vger.kernel.org Signed-off-by: Xu Yang Link: https://patch.msgid.link/20260402071457.2516021-2-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/core.c | 45 +++++++++++++++++++------------------ drivers/usb/chipidea/otg.c | 3 +++ 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 694b4a8e4e1d8..6df442435af6c 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -543,30 +543,31 @@ static irqreturn_t ci_irq_handler(int irq, void *data) if (ret == IRQ_HANDLED) return ret; } - } - /* - * Handle id change interrupt, it indicates device/host function - * switch. - */ - if (ci->is_otg && (otgsc & OTGSC_IDIE) && (otgsc & OTGSC_IDIS)) { - ci->id_event = true; - /* Clear ID change irq status */ - hw_write_otgsc(ci, OTGSC_IDIS, OTGSC_IDIS); - ci_otg_queue_work(ci); - return IRQ_HANDLED; - } + /* + * Handle id change interrupt, it indicates device/host function + * switch. + */ + if ((otgsc & OTGSC_IDIE) && (otgsc & OTGSC_IDIS)) { + ci->id_event = true; + /* Clear ID change irq status */ + hw_write_otgsc(ci, OTGSC_IDIS, OTGSC_IDIS); + } - /* - * Handle vbus change interrupt, it indicates device connection - * and disconnection events. - */ - if (ci->is_otg && (otgsc & OTGSC_BSVIE) && (otgsc & OTGSC_BSVIS)) { - ci->b_sess_valid_event = true; - /* Clear BSV irq */ - hw_write_otgsc(ci, OTGSC_BSVIS, OTGSC_BSVIS); - ci_otg_queue_work(ci); - return IRQ_HANDLED; + /* + * Handle vbus change interrupt, it indicates device connection + * and disconnection events. + */ + if ((otgsc & OTGSC_BSVIE) && (otgsc & OTGSC_BSVIS)) { + ci->b_sess_valid_event = true; + /* Clear BSV irq */ + hw_write_otgsc(ci, OTGSC_BSVIS, OTGSC_BSVIS); + } + + if (ci->id_event || ci->b_sess_valid_event) { + ci_otg_queue_work(ci); + return IRQ_HANDLED; + } } /* Handle device/host interrupt */ diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c index 2371789effa05..fecc7d7e2f0d7 100644 --- a/drivers/usb/chipidea/otg.c +++ b/drivers/usb/chipidea/otg.c @@ -130,6 +130,9 @@ enum ci_role ci_otg_role(struct ci_hdrc *ci) void ci_handle_vbus_change(struct ci_hdrc *ci) { + if (ci->role != CI_ROLE_GADGET) + return; + if (!ci->is_otg) { if (ci->platdata->flags & CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS) usb_gadget_vbus_connect(&ci->gadget); From c578b48b9fdf4d048ee083e1eb6720de426001fc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 10 Apr 2026 16:32:19 +0200 Subject: [PATCH 010/554] ALSA: usb-audio: Evaluate packsize caps at the right place commit 52521e8398839105ef8eb22b3f0993f9b0d11a57 upstream. We introduced the upper bound checks of the packet sizes by the ep->maxframesize for avoiding the URB submission errors. However, the check was applied at an incorrect place in the function snd_usb_endpoint_set_params() where ep->maxframesize isn't defined yet; the value is defined at a bit later position. So this ended up with a failure at the first run while the second run works. For fixing it, move the check at the correct place, right after the calculation of ep->maxframesize in the same function. Fixes: 7fe8dec3f628 ("ALSA: usb-audio: Cap the packet size pre-calculations") Link: https://bugzilla.kernel.org/show_bug.cgi?id=221292 Cc: Link: https://patch.msgid.link/20260410143220.1676344-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/endpoint.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 77c4330d52953..15490a87b460e 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -1375,9 +1375,6 @@ int snd_usb_endpoint_set_params(struct snd_usb_audio *chip, return -EINVAL; } - ep->packsize[0] = min(ep->packsize[0], ep->maxframesize); - ep->packsize[1] = min(ep->packsize[1], ep->maxframesize); - /* calculate the frequency in 16.16 format */ ep->freqm = ep->freqn; ep->freqshift = INT_MIN; @@ -1404,6 +1401,9 @@ int snd_usb_endpoint_set_params(struct snd_usb_audio *chip, ep->maxframesize = ep->maxpacksize / ep->cur_frame_bytes; ep->curframesize = ep->curpacksize / ep->cur_frame_bytes; + ep->packsize[0] = min(ep->packsize[0], ep->maxframesize); + ep->packsize[1] = min(ep->packsize[1], ep->maxframesize); + err = update_clock_ref_rate(chip, ep); if (err >= 0) { ep->need_setup = false; From 85cbf7fb568af5358aae61925c4e66b8f5e1439d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 22 Apr 2026 15:45:12 +0800 Subject: [PATCH 011/554] LoongArch: Add spectre boundry for syscall dispatch table commit 0c965d2784fbbd7f8e3b96d875c9cfdf7c00da3d upstream. The LoongArch syscall number is directly controlled by userspace, but does not have a array_index_nospec() boundry to prevent access past the syscall function pointer tables. Cc: stable@vger.kernel.org Assisted-by: gkh_clanker_2000 Signed-off-by: Greg Kroah-Hartman Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/syscall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c index 168bd97540f8c..d0257935078eb 100644 --- a/arch/loongarch/kernel/syscall.c +++ b/arch/loongarch/kernel/syscall.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -61,7 +62,7 @@ void noinstr __no_stack_protector do_syscall(struct pt_regs *regs) add_random_kstack_offset(); if (nr < NR_syscalls) { - syscall_fn = sys_call_table[nr]; + syscall_fn = sys_call_table[array_index_nospec(nr, NR_syscalls)]; regs->regs[4] = syscall_fn(regs->orig_a0, regs->regs[5], regs->regs[6], regs->regs[7], regs->regs[8], regs->regs[9]); } From 332884f5eb79dd60a7162b079d09d39208567a31 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 20 Apr 2026 21:16:09 +0200 Subject: [PATCH 012/554] drm/nouveau: fix u32 overflow in pushbuf reloc bounds check commit 2fc87d37be1b730a149b035f9375fdb8cc5333a5 upstream. nouveau_gem_pushbuf_reloc_apply() validates each relocation with if (r->reloc_bo_offset + 4 > nvbo->bo.base.size) but reloc_bo_offset is __u32 (uapi/drm/nouveau_drm.h) and the integer literal 4 promotes to unsigned int, so the addition is performed in 32 bits and wraps before the comparison against the size_t bo size. Cast to u64 so the addition happens in 64-bit arithmetic. Cc: Lyude Paul Cc: Danilo Krummrich Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Simona Vetter Reported-by: Anthropic Cc: stable Assisted-by: gkh_clanker_t1000 Fixes: a1606a9596e5 ("drm/nouveau: new gem pushbuf interface, bump to 0.0.16") Signed-off-by: Greg Kroah-Hartman [ Add Fixes: tag. - Danilo ] Signed-off-by: Danilo Krummrich Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 690e10fbf0bd6..f6cfb6ea4885d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -686,7 +686,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, } nvbo = (void *)(unsigned long)bo[r->reloc_bo_index].user_priv; - if (unlikely(r->reloc_bo_offset + 4 > + if (unlikely((u64)r->reloc_bo_offset + 4 > nvbo->bo.base.size)) { NV_PRINTK(err, cli, "reloc outside of bo\n"); ret = -EINVAL; From 438e357b3cc6cd6900df271e4bc567bfe1142281 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 19 Feb 2026 15:34:35 +0100 Subject: [PATCH 013/554] leds: qcom-lpg: Check for array overflow when selecting the high resolution commit d45963a93c1495e9f1338fde91d0ebba8fd22474 upstream. When selecting the high resolution values from the array, FIELD_GET() is used to pull from a 3 bit register, yet the array being indexed has only 5 values in it. Odds are the hardware is sane, but just to be safe, properly check before just overflowing and reading random data and then setting up chip values based on that. Cc: stable Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026021934-nearby-playroom-036b@gregkh Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/leds/rgb/leds-qcom-lpg.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/leds/rgb/leds-qcom-lpg.c b/drivers/leds/rgb/leds-qcom-lpg.c index a460782dadca4..ce0e61452af27 100644 --- a/drivers/leds/rgb/leds-qcom-lpg.c +++ b/drivers/leds/rgb/leds-qcom-lpg.c @@ -1273,7 +1273,12 @@ static int lpg_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, return ret; if (chan->subtype == LPG_SUBTYPE_HI_RES_PWM) { - refclk = lpg_clk_rates_hi_res[FIELD_GET(PWM_CLK_SELECT_HI_RES_MASK, val)]; + unsigned int clk_idx = FIELD_GET(PWM_CLK_SELECT_HI_RES_MASK, val); + + if (clk_idx >= ARRAY_SIZE(lpg_clk_rates_hi_res)) + return -EINVAL; + + refclk = lpg_clk_rates_hi_res[clk_idx]; resolution = lpg_pwm_resolution_hi_res[FIELD_GET(PWM_SIZE_HI_RES_MASK, val)]; } else { refclk = lpg_clk_rates[FIELD_GET(PWM_CLK_SELECT_MASK, val)]; From fb91d4e49fcbea0b5091394ac5b8f7d4124265c3 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Thu, 2 Apr 2026 13:40:16 +0800 Subject: [PATCH 014/554] greybus: gb-beagleplay: bound bootloader receive buffering commit 1214bf28965ceaf584fb20d357731264dd2e10e1 upstream. cc1352_bootloader_rx() appends each serdev chunk into the fixed rx_buffer before parsing bootloader packets. The helper can keep leftover bytes between callbacks and may receive multiple packets in one callback, so a single count value is not constrained by one packet length. Check that the incoming chunk fits in the remaining receive buffer space before memcpy(). If it does not, drop the staged data and consume the bytes instead of overflowing rx_buffer. Fixes: 0cf7befa3ea2 ("greybus: gb-beagleplay: Add firmware upload API") Cc: stable Signed-off-by: Pengpeng Hou Link: https://patch.msgid.link/20260402054016.38587-1-pengpeng@iscas.ac.cn Signed-off-by: Greg Kroah-Hartman --- drivers/greybus/gb-beagleplay.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/greybus/gb-beagleplay.c b/drivers/greybus/gb-beagleplay.c index 87186f891a6ac..e70787146c4fa 100644 --- a/drivers/greybus/gb-beagleplay.c +++ b/drivers/greybus/gb-beagleplay.c @@ -535,6 +535,13 @@ static size_t cc1352_bootloader_rx(struct gb_beagleplay *bg, const u8 *data, int ret; size_t off = 0; + if (count > sizeof(bg->rx_buffer) - bg->rx_buffer_len) { + dev_warn(&bg->sd->dev, + "dropping oversized bootloader receive chunk"); + bg->rx_buffer_len = 0; + return count; + } + memcpy(bg->rx_buffer + bg->rx_buffer_len, data, count); bg->rx_buffer_len += count; From b2801647c203a38e013802e9e9616b5bfac64968 Mon Sep 17 00:00:00 2001 From: Weigang He Date: Mon, 30 Mar 2026 12:08:00 +0000 Subject: [PATCH 015/554] greybus: gb-beagleplay: fix sleep in atomic context in hdlc_tx_frames() commit 6b526dca0966f2370835765019a54319b78fca8d upstream. hdlc_append() calls usleep_range() to wait for circular buffer space, but it is called with tx_producer_lock (a spinlock) held via hdlc_tx_frames() -> hdlc_append_tx_frame()/hdlc_append_tx_u8()/etc. Sleeping while holding a spinlock is illegal and can trigger "BUG: scheduling while atomic". Fix this by moving the buffer-space wait out of hdlc_append() and into hdlc_tx_frames(), before the spinlock is acquired. The new flow: 1. Pre-calculate the worst-case encoded frame length. 2. Wait (with sleep) outside the lock until enough space is available, kicking the TX consumer work to drain the buffer. 3. Acquire the spinlock, re-verify space, and write the entire frame atomically. This ensures that sleeping only happens without any lock held, and that frames are either fully enqueued or not written at all. This bug is found by CodeQL static analysis tool (interprocedural sleep-in-atomic query) and my code review. Fixes: ec558bbfea67 ("greybus: Add BeaglePlay Linux Driver") Cc: stable Cc: Ayush Singh Cc: Johan Hovold Cc: Alex Elder Cc: Greg Kroah-Hartman Signed-off-by: Weigang He Link: https://patch.msgid.link/20260330120801.981506-1-geoffreyhe2@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/greybus/gb-beagleplay.c | 105 +++++++++++++++++++++++++++----- 1 file changed, 89 insertions(+), 16 deletions(-) diff --git a/drivers/greybus/gb-beagleplay.c b/drivers/greybus/gb-beagleplay.c index e70787146c4fa..e28d1e9ec9573 100644 --- a/drivers/greybus/gb-beagleplay.c +++ b/drivers/greybus/gb-beagleplay.c @@ -242,30 +242,26 @@ static void hdlc_write(struct gb_beagleplay *bg) } /** - * hdlc_append() - Queue HDLC data for sending. + * hdlc_append() - Queue a single HDLC byte for sending. * @bg: beagleplay greybus driver * @value: hdlc byte to transmit * - * Assumes that producer lock as been acquired. + * Caller must hold tx_producer_lock and must have ensured sufficient + * space in the circular buffer before calling (see hdlc_tx_frames()). */ static void hdlc_append(struct gb_beagleplay *bg, u8 value) { - int tail, head = bg->tx_circ_buf.head; + int head = bg->tx_circ_buf.head; + int tail = READ_ONCE(bg->tx_circ_buf.tail); - while (true) { - tail = READ_ONCE(bg->tx_circ_buf.tail); - - if (CIRC_SPACE(head, tail, TX_CIRC_BUF_SIZE) >= 1) { - bg->tx_circ_buf.buf[head] = value; + lockdep_assert_held(&bg->tx_producer_lock); + if (WARN_ON_ONCE(CIRC_SPACE(head, tail, TX_CIRC_BUF_SIZE) < 1)) + return; - /* Finish producing HDLC byte */ - smp_store_release(&bg->tx_circ_buf.head, - (head + 1) & (TX_CIRC_BUF_SIZE - 1)); - return; - } - dev_warn(&bg->sd->dev, "Tx circ buf full"); - usleep_range(3000, 5000); - } + bg->tx_circ_buf.buf[head] = value; + /* Ensure buffer write is visible before advancing head. */ + smp_store_release(&bg->tx_circ_buf.head, + (head + 1) & (TX_CIRC_BUF_SIZE - 1)); } static void hdlc_append_escaped(struct gb_beagleplay *bg, u8 value) @@ -313,13 +309,90 @@ static void hdlc_transmit(struct work_struct *work) spin_unlock_bh(&bg->tx_consumer_lock); } +/** + * hdlc_encoded_length() - Calculate worst-case encoded length of an HDLC frame. + * @payloads: array of payload buffers + * @count: number of payloads + * + * Returns the maximum number of bytes needed in the circular buffer. + */ +static size_t hdlc_encoded_length(const struct hdlc_payload payloads[], + size_t count) +{ + size_t i, payload_len = 0; + + for (i = 0; i < count; i++) + payload_len += payloads[i].len; + + /* + * Worst case: every data byte needs escaping (doubles in size). + * data bytes = address(1) + control(1) + payload + crc(2) + * framing = opening flag(1) + closing flag(1) + */ + return 2 + (1 + 1 + payload_len + 2) * 2; +} + +#define HDLC_TX_BUF_WAIT_RETRIES 500 +#define HDLC_TX_BUF_WAIT_US_MIN 3000 +#define HDLC_TX_BUF_WAIT_US_MAX 5000 + +/** + * hdlc_tx_frames() - Encode and queue an HDLC frame for transmission. + * @bg: beagleplay greybus driver + * @address: HDLC address field + * @control: HDLC control field + * @payloads: array of payload buffers + * @count: number of payloads + * + * Sleeps outside the spinlock until enough circular-buffer space is + * available, then verifies space under the lock and writes the entire + * frame atomically. Either a complete frame is enqueued or nothing is + * written, avoiding both sleeping in atomic context and partial frames. + */ static void hdlc_tx_frames(struct gb_beagleplay *bg, u8 address, u8 control, const struct hdlc_payload payloads[], size_t count) { + size_t needed = hdlc_encoded_length(payloads, count); + int retries = HDLC_TX_BUF_WAIT_RETRIES; size_t i; + int head, tail; + + /* Wait outside the lock for sufficient buffer space. */ + while (retries--) { + /* Pairs with smp_store_release() in hdlc_append(). */ + head = smp_load_acquire(&bg->tx_circ_buf.head); + tail = READ_ONCE(bg->tx_circ_buf.tail); + + if (CIRC_SPACE(head, tail, TX_CIRC_BUF_SIZE) >= needed) + break; + + /* Kick the consumer and sleep — no lock held. */ + schedule_work(&bg->tx_work); + usleep_range(HDLC_TX_BUF_WAIT_US_MIN, HDLC_TX_BUF_WAIT_US_MAX); + } + + if (retries < 0) { + dev_warn_ratelimited(&bg->sd->dev, + "Tx circ buf full, dropping frame\n"); + return; + } spin_lock(&bg->tx_producer_lock); + /* + * Re-check under the lock. Should not fail since + * tx_producer_lock serialises all producers and the + * consumer only frees space, but guard against it. + */ + head = bg->tx_circ_buf.head; + tail = READ_ONCE(bg->tx_circ_buf.tail); + if (unlikely(CIRC_SPACE(head, tail, TX_CIRC_BUF_SIZE) < needed)) { + spin_unlock(&bg->tx_producer_lock); + dev_warn_ratelimited(&bg->sd->dev, + "Tx circ buf space lost, dropping frame\n"); + return; + } + hdlc_append_tx_frame(bg); hdlc_append_tx_u8(bg, address); hdlc_append_tx_u8(bg, control); From 22a16d3eafee92a165c756081587c95850127107 Mon Sep 17 00:00:00 2001 From: Tyllis Xu Date: Sun, 8 Mar 2026 00:21:08 -0600 Subject: [PATCH 016/554] misc: ibmasm: fix OOB MMIO read in ibmasm_handle_mouse_interrupt() commit 4b6e6ead556734bdc14024c5f837132b1e7a4b84 upstream. ibmasm_handle_mouse_interrupt() performs an out-of-bounds MMIO read when the queue reader or writer index from hardware exceeds REMOTE_QUEUE_SIZE (60). A compromised service processor can trigger this by writing an out-of-range value to the reader or writer MMIO register before asserting an interrupt. Since writer is re-read from hardware on every loop iteration, it can also be set to an out-of-range value after the loop has already started. The root cause is that get_queue_reader() and get_queue_writer() return raw readl() values that are passed directly into get_queue_entry(), which computes: queue_begin + reader * sizeof(struct remote_input) with no bounds check. This unchecked MMIO address is then passed to memcpy_fromio(), reading 8 bytes from unintended device registers. For sufficiently large values the address falls outside the PCI BAR mapping entirely, triggering a machine check exception. Fix by checking both indices against REMOTE_QUEUE_SIZE at the top of the loop body, before any call to get_queue_entry(). On an out-of-range value, reset the reader register to 0 via set_queue_reader() before breaking, so that normal queue operation can resume if the corrupted hardware state is transient. Reported-by: Yuhao Jiang Fixes: 278d72ae8803 ("[PATCH] ibmasm driver: redesign handling of remote control events") Cc: stable@vger.kernel.org Cc: ychen@northwestern.edu Signed-off-by: Tyllis Xu Link: https://patch.msgid.link/20260308062108.258940-1-LivelyCarpet87@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ibmasm/remote.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/misc/ibmasm/remote.c b/drivers/misc/ibmasm/remote.c index ec816d3b38cbd..521531738c9af 100644 --- a/drivers/misc/ibmasm/remote.c +++ b/drivers/misc/ibmasm/remote.c @@ -177,6 +177,11 @@ void ibmasm_handle_mouse_interrupt(struct service_processor *sp) writer = get_queue_writer(sp); while (reader != writer) { + if (reader >= REMOTE_QUEUE_SIZE || writer >= REMOTE_QUEUE_SIZE) { + set_queue_reader(sp, 0); + break; + } + memcpy_fromio(&input, get_queue_entry(sp, reader), sizeof(struct remote_input)); From ee5737891464030a189837467df3b81a273718ad Mon Sep 17 00:00:00 2001 From: Tyllis Xu Date: Sat, 14 Mar 2026 11:53:54 -0500 Subject: [PATCH 017/554] ibmasm: fix OOB reads in command_file_write due to missing size checks commit 0eb09f737428e482a32a2e31e5e223f2b35a71d3 upstream. The command_file_write() handler allocates a kernel buffer of exactly count bytes and copies user data into it, but does not validate the buffer against the dot command protocol before passing it to get_dot_command_size() and get_dot_command_timeout(). Since both the allocation size (count) and the header fields (command_size, data_size) are independently user-controlled, an attacker can cause get_dot_command_size() to return a value exceeding the allocation, triggering OOB reads in get_dot_command_timeout() and an out-of-bounds memcpy_toio() that leaks kernel heap memory to the service processor. Fix with two guards: reject writes smaller than sizeof(struct dot_command_header) before allocation, then after copying user data reject commands where the buffer is smaller than the total size declared by the header (sizeof(header) + command_size + data_size). This ensures all subsequent header and payload field accesses stay within the buffer. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Yuhao Jiang Cc: stable@vger.kernel.org Signed-off-by: Tyllis Xu Link: https://patch.msgid.link/20260314165355.548119-1-LivelyCarpet87@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ibmasm/ibmasmfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index b26c930e3edb4..0aecf366e61ba 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -303,6 +303,8 @@ static ssize_t command_file_write(struct file *file, const char __user *ubuff, s return -EINVAL; if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE) return 0; + if (count < sizeof(struct dot_command_header)) + return -EINVAL; if (*offset != 0) return 0; @@ -319,6 +321,11 @@ static ssize_t command_file_write(struct file *file, const char __user *ubuff, s return -EFAULT; } + if (count < get_dot_command_size(cmd->buffer)) { + command_put(cmd); + return -EINVAL; + } + spin_lock_irqsave(&command_data->sp->lock, flags); if (command_data->command) { spin_unlock_irqrestore(&command_data->sp->lock, flags); From c1c2417c60dbdca5ebb00462f21ee71c2d7f7083 Mon Sep 17 00:00:00 2001 From: Tyllis Xu Date: Sat, 14 Mar 2026 11:58:05 -0500 Subject: [PATCH 018/554] ibmasm: fix heap over-read in ibmasm_send_i2o_message() commit 9aad71144fa3682cca3837a06c8623016790e7ec upstream. The ibmasm_send_i2o_message() function uses get_dot_command_size() to compute the byte count for memcpy_toio(), but this value is derived from user-controlled fields in the dot_command_header (command_size: u8, data_size: u16) and is never validated against the actual allocation size. A root user can write a small buffer with inflated header fields, causing memcpy_toio() to read up to ~65 KB past the end of the allocation into adjacent kernel heap, which is then forwarded to the service processor over MMIO. Silently clamping the copy size is not sufficient: if the header fields claim a larger size than the buffer, the SP receives a dot command whose own header is inconsistent with the I2O message length, which can cause the SP to desynchronize. Reject such commands outright by returning failure. Validate command_size before calling get_mfa_inbound() to avoid leaking an I2O message frame: reading INBOUND_QUEUE_PORT dequeues a hardware frame from the controller's free pool, and returning without a corresponding set_mfa_inbound() call would permanently exhaust it. Additionally, clamp command_size to I2O_COMMAND_SIZE before the memcpy_toio() so the MMIO write stays within the I2O message frame, consistent with the clamping already performed by outgoing_message_size() for the header field. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Yuhao Jiang Cc: stable@vger.kernel.org Signed-off-by: Tyllis Xu Link: https://patch.msgid.link/20260314165805.548293-1-LivelyCarpet87@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ibmasm/lowlevel.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/misc/ibmasm/lowlevel.c b/drivers/misc/ibmasm/lowlevel.c index 6922dc6c10db0..5313230f36ad4 100644 --- a/drivers/misc/ibmasm/lowlevel.c +++ b/drivers/misc/ibmasm/lowlevel.c @@ -19,17 +19,21 @@ static struct i2o_header header = I2O_HEADER_TEMPLATE; int ibmasm_send_i2o_message(struct service_processor *sp) { u32 mfa; - unsigned int command_size; + size_t command_size; struct i2o_message *message; struct command *command = sp->current_command; + command_size = get_dot_command_size(command->buffer); + if (command_size > command->buffer_size) + return 1; + if (command_size > I2O_COMMAND_SIZE) + command_size = I2O_COMMAND_SIZE; + mfa = get_mfa_inbound(sp->base_address); if (!mfa) return 1; - command_size = get_dot_command_size(command->buffer); - header.message_size = outgoing_message_size(command_size); - + header.message_size = outgoing_message_size((unsigned int)command_size); message = get_i2o_message(sp->base_address, mfa); memcpy_toio(&message->header, &header, sizeof(struct i2o_header)); From 3e8fefd2997c85e98de81f35380616c0a51430a4 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 6 Apr 2026 16:22:54 -0700 Subject: [PATCH 019/554] driver core: Don't let a device probe until it's ready commit a2225b6e834a838ae3c93709760edc0a169eb2f2 upstream. The moment we link a "struct device" into the list of devices for the bus, it's possible probe can happen. This is because another thread can load the driver at any time and that can cause the device to probe. This has been seen in practice with a stack crawl that looks like this [1]: really_probe() __driver_probe_device() driver_probe_device() __driver_attach() bus_for_each_dev() driver_attach() bus_add_driver() driver_register() __platform_driver_register() init_module() [some module] do_one_initcall() do_init_module() load_module() __arm64_sys_finit_module() invoke_syscall() As a result of the above, it was seen that device_links_driver_bound() could be called for the device before "dev->fwnode->dev" was assigned. This prevented __fw_devlink_pickup_dangling_consumers() from being called which meant that other devices waiting on our driver's sub-nodes were stuck deferring forever. It's believed that this problem is showing up suddenly for two reasons: 1. Android has recently (last ~1 year) implemented an optimization to the order it loads modules [2]. When devices opt-in to this faster loading, modules are loaded one-after-the-other very quickly. This is unlike how other distributions do it. The reproduction of this problem has only been seen on devices that opt-in to Android's "parallel module loading". 2. Android devices typically opt-in to fw_devlink, and the most noticeable issue is the NULL "dev->fwnode->dev" in device_links_driver_bound(). fw_devlink is somewhat new code and also not in use by all Linux devices. Even though the specific symptom where "dev->fwnode->dev" wasn't assigned could be fixed by moving that assignment higher in device_add(), other parts of device_add() (like the call to device_pm_add()) are also important to run before probe. Only moving the "dev->fwnode->dev" assignment would likely fix the current symptoms but lead to difficult-to-debug problems in the future. Fix the problem by preventing probe until device_add() has run far enough that the device is ready to probe. If somehow we end up trying to probe before we're allowed, __driver_probe_device() will return -EPROBE_DEFER which will make certain the device is noticed. In the race condition that was seen with Android's faster module loading, we will temporarily add the device to the deferred list and then take it off immediately when device_add() probes the device. Instead of adding another flag to the bitfields already in "struct device", instead add a new "flags" field and use that. This allows us to freely change the bit from different thread without worrying about corrupting nearby bits (and means threads changing other bit won't corrupt us). [1] Captured on a machine running a downstream 6.6 kernel [2] https://cs.android.com/android/platform/superproject/main/+/main:system/core/libmodprobe/libmodprobe.cpp?q=LoadModulesParallel Cc: stable@vger.kernel.org Fixes: 2023c610dc54 ("Driver core: add new device to bus's list before probing") Reviewed-by: Alan Stern Reviewed-by: Rafael J. Wysocki (Intel) Reviewed-by: Danilo Krummrich Acked-by: Greg Kroah-Hartman Acked-by: Marek Szyprowski Signed-off-by: Douglas Anderson Link: https://patch.msgid.link/20260406162231.v5.1.Id750b0fbcc94f23ed04b7aecabcead688d0d8c17@changeid Signed-off-by: Danilo Krummrich Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 15 ++++++++++++++ drivers/base/dd.c | 20 +++++++++++++++++++ include/linux/device.h | 44 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 3099dbca234ab..5dbfe936a6cea 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3688,6 +3688,21 @@ int device_add(struct device *dev) fw_devlink_link_device(dev); } + /* + * The moment the device was linked into the bus's "klist_devices" in + * bus_add_device() then it's possible that probe could have been + * attempted in a different thread via userspace loading a driver + * matching the device. "ready_to_probe" being unset would have + * blocked those attempts. Now that all of the above initialization has + * happened, unblock probe. If probe happens through another thread + * after this point but before bus_probe_device() runs then it's fine. + * bus_probe_device() -> device_initial_probe() -> __device_attach() + * will notice (under device_lock) that the device is already bound. + */ + device_lock(dev); + dev_set_ready_to_probe(dev); + device_unlock(dev); + bus_probe_device(dev); /* diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 2996f4c667c42..2c3a610f52a74 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -846,6 +846,26 @@ static int __driver_probe_device(const struct device_driver *drv, struct device if (dev->driver) return -EBUSY; + /* + * In device_add(), the "struct device" gets linked into the subsystem's + * list of devices and broadcast to userspace (via uevent) before we're + * quite ready to probe. Those open pathways to driver probe before + * we've finished enough of device_add() to reliably support probe. + * Detect this and tell other pathways to try again later. device_add() + * itself will also try to probe immediately after setting + * "ready_to_probe". + */ + if (!dev_ready_to_probe(dev)) + return dev_err_probe(dev, -EPROBE_DEFER, "Device not ready to probe\n"); + + /* + * Set can_match = true after calling dev_ready_to_probe(), so + * driver_deferred_probe_add() won't actually add the device to the + * deferred probe list when dev_ready_to_probe() returns false. + * + * When dev_ready_to_probe() returns false, it means that device_add() + * will do another probe() attempt for us. + */ dev->can_match = true; dev_dbg(dev, "bus: '%s': %s: matched device with driver %s\n", drv->bus->name, __func__, drv->name); diff --git a/include/linux/device.h b/include/linux/device.h index 8733a4edf3ccd..f5d5da4dfab1d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -477,6 +477,21 @@ struct device_physical_location { bool lid; }; +/** + * enum struct_device_flags - Flags in struct device + * + * Each flag should have a set of accessor functions created via + * __create_dev_flag_accessors() for each access. + * + * @DEV_FLAG_READY_TO_PROBE: If set then device_add() has finished enough + * initialization that probe could be called. + */ +enum struct_device_flags { + DEV_FLAG_READY_TO_PROBE = 0, + + DEV_FLAG_COUNT +}; + /** * struct device - The basic device structure * @parent: The device's "parent" device, the device to which it is attached. @@ -572,6 +587,7 @@ struct device_physical_location { * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers. * @dma_iommu: Device is using default IOMMU implementation for DMA and * doesn't rely on dma_ops structure. + * @flags: DEV_FLAG_XXX flags. Use atomic bitfield operations to modify. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -694,8 +710,36 @@ struct device { #ifdef CONFIG_IOMMU_DMA bool dma_iommu:1; #endif + + DECLARE_BITMAP(flags, DEV_FLAG_COUNT); }; +#define __create_dev_flag_accessors(accessor_name, flag_name) \ +static inline bool dev_##accessor_name(const struct device *dev) \ +{ \ + return test_bit(flag_name, dev->flags); \ +} \ +static inline void dev_set_##accessor_name(struct device *dev) \ +{ \ + set_bit(flag_name, dev->flags); \ +} \ +static inline void dev_clear_##accessor_name(struct device *dev) \ +{ \ + clear_bit(flag_name, dev->flags); \ +} \ +static inline void dev_assign_##accessor_name(struct device *dev, bool value) \ +{ \ + assign_bit(flag_name, dev->flags, value); \ +} \ +static inline bool dev_test_and_set_##accessor_name(struct device *dev) \ +{ \ + return test_and_set_bit(flag_name, dev->flags); \ +} + +__create_dev_flag_accessors(ready_to_probe, DEV_FLAG_READY_TO_PROBE); + +#undef __create_dev_flag_accessors + /** * struct device_link - Device link representation. * @supplier: The device on the supplier end of the link. From fa9a4c5e69aaae47df95328fa96b3f2931e3180a Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 17 Mar 2026 09:01:20 -0700 Subject: [PATCH 020/554] device property: Make modifications of fwnode "flags" thread safe commit f72e77c33e4b5657af35125e75bab249256030f3 upstream. In various places in the kernel, we modify the fwnode "flags" member by doing either: fwnode->flags |= SOME_FLAG; fwnode->flags &= ~SOME_FLAG; This type of modification is not thread-safe. If two threads are both mucking with the flags at the same time then one can clobber the other. While flags are often modified while under the "fwnode_link_lock", this is not universally true. Create some accessor functions for setting, clearing, and testing the FWNODE flags and move all users to these accessor functions. New accessor functions use set_bit() and clear_bit(), which are thread-safe. Cc: stable@vger.kernel.org Fixes: c2c724c868c4 ("driver core: Add fw_devlink_parse_fwtree()") Reviewed-by: Andy Shevchenko Acked-by: Mark Brown Reviewed-by: Wolfram Sang Signed-off-by: Douglas Anderson Reviewed-by: Rafael J. Wysocki (Intel) Reviewed-by: Saravana Kannan Link: https://patch.msgid.link/20260317090112.v2.1.I0a4d03104ecd5103df3d76f66c8d21b1d15a2e38@changeid [ Fix fwnode_clear_flag() argument alignment, restore dropped blank line in fwnode_dev_initialized(), and remove unnecessary parentheses around fwnode_test_flag() calls. - Danilo ] Signed-off-by: Danilo Krummrich Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 24 ++++++++-------- drivers/bus/imx-weim.c | 2 +- drivers/i2c/i2c-core-of.c | 2 +- drivers/net/phy/mdio_bus_provider.c | 4 +-- drivers/of/base.c | 2 +- drivers/of/dynamic.c | 2 +- drivers/of/platform.c | 2 +- drivers/spi/spi.c | 2 +- include/linux/fwnode.h | 44 +++++++++++++++++++++-------- 9 files changed, 53 insertions(+), 31 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 5dbfe936a6cea..01e14f787916a 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -182,7 +182,7 @@ void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) if (fwnode->dev) return; - fwnode->flags |= FWNODE_FLAG_NOT_DEVICE; + fwnode_set_flag(fwnode, FWNODE_FLAG_NOT_DEVICE); fwnode_links_purge_consumers(fwnode); fwnode_for_each_available_child_node(fwnode, child) @@ -228,7 +228,7 @@ static void __fw_devlink_pickup_dangling_consumers(struct fwnode_handle *fwnode, if (fwnode->dev && fwnode->dev->bus) return; - fwnode->flags |= FWNODE_FLAG_NOT_DEVICE; + fwnode_set_flag(fwnode, FWNODE_FLAG_NOT_DEVICE); __fwnode_links_move_consumers(fwnode, new_sup); fwnode_for_each_available_child_node(fwnode, child) @@ -1012,7 +1012,7 @@ static void device_links_missing_supplier(struct device *dev) static bool dev_is_best_effort(struct device *dev) { return (fw_devlink_best_effort && dev->can_match) || - (dev->fwnode && (dev->fwnode->flags & FWNODE_FLAG_BEST_EFFORT)); + (dev->fwnode && fwnode_test_flag(dev->fwnode, FWNODE_FLAG_BEST_EFFORT)); } static struct fwnode_handle *fwnode_links_check_suppliers( @@ -1723,11 +1723,11 @@ bool fw_devlink_is_strict(void) static void fw_devlink_parse_fwnode(struct fwnode_handle *fwnode) { - if (fwnode->flags & FWNODE_FLAG_LINKS_ADDED) + if (fwnode_test_flag(fwnode, FWNODE_FLAG_LINKS_ADDED)) return; fwnode_call_int_op(fwnode, add_links); - fwnode->flags |= FWNODE_FLAG_LINKS_ADDED; + fwnode_set_flag(fwnode, FWNODE_FLAG_LINKS_ADDED); } static void fw_devlink_parse_fwtree(struct fwnode_handle *fwnode) @@ -1885,7 +1885,7 @@ static bool fwnode_init_without_drv(struct fwnode_handle *fwnode) struct device *dev; bool ret; - if (!(fwnode->flags & FWNODE_FLAG_INITIALIZED)) + if (!fwnode_test_flag(fwnode, FWNODE_FLAG_INITIALIZED)) return false; dev = get_dev_from_fwnode(fwnode); @@ -2001,10 +2001,10 @@ static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle, * We aren't trying to find all cycles. Just a cycle between con and * sup_handle. */ - if (sup_handle->flags & FWNODE_FLAG_VISITED) + if (fwnode_test_flag(sup_handle, FWNODE_FLAG_VISITED)) return false; - sup_handle->flags |= FWNODE_FLAG_VISITED; + fwnode_set_flag(sup_handle, FWNODE_FLAG_VISITED); /* Termination condition. */ if (sup_handle == con_handle) { @@ -2074,7 +2074,7 @@ static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle, } out: - sup_handle->flags &= ~FWNODE_FLAG_VISITED; + fwnode_clear_flag(sup_handle, FWNODE_FLAG_VISITED); put_device(sup_dev); put_device(con_dev); put_device(par_dev); @@ -2127,7 +2127,7 @@ static int fw_devlink_create_devlink(struct device *con, * When such a flag is set, we can't create device links where P is the * supplier of C as that would delay the probe of C. */ - if (sup_handle->flags & FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD && + if (fwnode_test_flag(sup_handle, FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD) && fwnode_is_ancestor_of(sup_handle, con->fwnode)) return -EINVAL; @@ -2150,7 +2150,7 @@ static int fw_devlink_create_devlink(struct device *con, else flags = FW_DEVLINK_FLAGS_PERMISSIVE; - if (sup_handle->flags & FWNODE_FLAG_NOT_DEVICE) + if (fwnode_test_flag(sup_handle, FWNODE_FLAG_NOT_DEVICE)) sup_dev = fwnode_get_next_parent_dev(sup_handle); else sup_dev = get_dev_from_fwnode(sup_handle); @@ -2162,7 +2162,7 @@ static int fw_devlink_create_devlink(struct device *con, * supplier device indefinitely. */ if (sup_dev->links.status == DL_DEV_NO_DRIVER && - sup_handle->flags & FWNODE_FLAG_INITIALIZED) { + fwnode_test_flag(sup_handle, FWNODE_FLAG_INITIALIZED)) { dev_dbg(con, "Not linking %pfwf - dev might never probe\n", sup_handle); diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index 83d623d97f5f2..f735e0462c55e 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -332,7 +332,7 @@ static int of_weim_notify(struct notifier_block *nb, unsigned long action, * fw_devlink doesn't skip adding consumers to this * device. */ - rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE; + fwnode_clear_flag(&rd->dn->fwnode, FWNODE_FLAG_NOT_DEVICE); if (!of_platform_device_create(rd->dn, NULL, &pdev->dev)) { dev_err(&pdev->dev, "Failed to create child device '%pOF'\n", diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c index eb7fb202355f8..354a88d0599e3 100644 --- a/drivers/i2c/i2c-core-of.c +++ b/drivers/i2c/i2c-core-of.c @@ -180,7 +180,7 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, * Clear the flag before adding the device so that fw_devlink * doesn't skip adding consumers to this device. */ - rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE; + fwnode_clear_flag(&rd->dn->fwnode, FWNODE_FLAG_NOT_DEVICE); client = of_i2c_register_device(adap, rd->dn); if (IS_ERR(client)) { dev_err(&adap->dev, "failed to create client for '%pOF'\n", diff --git a/drivers/net/phy/mdio_bus_provider.c b/drivers/net/phy/mdio_bus_provider.c index a2391d4b7e5c8..fe32730be3a70 100644 --- a/drivers/net/phy/mdio_bus_provider.c +++ b/drivers/net/phy/mdio_bus_provider.c @@ -299,8 +299,8 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) return -EINVAL; if (bus->parent && bus->parent->of_node) - bus->parent->of_node->fwnode.flags |= - FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD; + fwnode_set_flag(&bus->parent->of_node->fwnode, + FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD); WARN(bus->state != MDIOBUS_ALLOCATED && bus->state != MDIOBUS_UNREGISTERED, diff --git a/drivers/of/base.c b/drivers/of/base.c index 2fd27ea0310c5..6620bf07b79b8 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1868,7 +1868,7 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) if (name) of_stdout = of_find_node_opts_by_path(name, &of_stdout_options); if (of_stdout) - of_stdout->fwnode.flags |= FWNODE_FLAG_BEST_EFFORT; + fwnode_set_flag(&of_stdout->fwnode, FWNODE_FLAG_BEST_EFFORT); } if (!of_aliases) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 2eaaddcb0ec4e..ab10c180c4887 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -225,7 +225,7 @@ static void __of_attach_node(struct device_node *np) np->sibling = np->parent->child; np->parent->child = np; of_node_clear_flag(np, OF_DETACHED); - np->fwnode.flags |= FWNODE_FLAG_NOT_DEVICE; + fwnode_set_flag(&np->fwnode, FWNODE_FLAG_NOT_DEVICE); raw_spin_unlock_irqrestore(&devtree_lock, flags); diff --git a/drivers/of/platform.c b/drivers/of/platform.c index a6dca3a005aac..da023bc0b5faf 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -743,7 +743,7 @@ static int of_platform_notify(struct notifier_block *nb, * Clear the flag before adding the device so that fw_devlink * doesn't skip adding consumers to this device. */ - rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE; + fwnode_clear_flag(&rd->dn->fwnode, FWNODE_FLAG_NOT_DEVICE); /* pdev_parent may be NULL when no bus platform device */ pdev_parent = of_find_device_by_node(parent); pdev = of_platform_device_create(rd->dn, NULL, diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 87d829d2a8427..03b5d9a6d1ffd 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -4801,7 +4801,7 @@ static int of_spi_notify(struct notifier_block *nb, unsigned long action, * Clear the flag before adding the device so that fw_devlink * doesn't skip adding consumers to this device. */ - rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE; + fwnode_clear_flag(&rd->dn->fwnode, FWNODE_FLAG_NOT_DEVICE); spi = of_register_spi_device(ctlr, rd->dn); put_device(&ctlr->dev); diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 097be89487bf5..80b38fbf2121c 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -15,6 +15,7 @@ #define _LINUX_FWNODE_H_ #include +#include #include #include #include @@ -42,12 +43,12 @@ struct device; * suppliers. Only enforce ordering with suppliers that have * drivers. */ -#define FWNODE_FLAG_LINKS_ADDED BIT(0) -#define FWNODE_FLAG_NOT_DEVICE BIT(1) -#define FWNODE_FLAG_INITIALIZED BIT(2) -#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD BIT(3) -#define FWNODE_FLAG_BEST_EFFORT BIT(4) -#define FWNODE_FLAG_VISITED BIT(5) +#define FWNODE_FLAG_LINKS_ADDED 0 +#define FWNODE_FLAG_NOT_DEVICE 1 +#define FWNODE_FLAG_INITIALIZED 2 +#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD 3 +#define FWNODE_FLAG_BEST_EFFORT 4 +#define FWNODE_FLAG_VISITED 5 struct fwnode_handle { struct fwnode_handle *secondary; @@ -57,7 +58,7 @@ struct fwnode_handle { struct device *dev; struct list_head suppliers; struct list_head consumers; - u8 flags; + unsigned long flags; }; /* @@ -212,16 +213,37 @@ static inline void fwnode_init(struct fwnode_handle *fwnode, INIT_LIST_HEAD(&fwnode->suppliers); } +static inline void fwnode_set_flag(struct fwnode_handle *fwnode, + unsigned int bit) +{ + set_bit(bit, &fwnode->flags); +} + +static inline void fwnode_clear_flag(struct fwnode_handle *fwnode, + unsigned int bit) +{ + clear_bit(bit, &fwnode->flags); +} + +static inline void fwnode_assign_flag(struct fwnode_handle *fwnode, + unsigned int bit, bool value) +{ + assign_bit(bit, &fwnode->flags, value); +} + +static inline bool fwnode_test_flag(struct fwnode_handle *fwnode, + unsigned int bit) +{ + return test_bit(bit, &fwnode->flags); +} + static inline void fwnode_dev_initialized(struct fwnode_handle *fwnode, bool initialized) { if (IS_ERR_OR_NULL(fwnode)) return; - if (initialized) - fwnode->flags |= FWNODE_FLAG_INITIALIZED; - else - fwnode->flags &= ~FWNODE_FLAG_INITIALIZED; + fwnode_assign_flag(fwnode, FWNODE_FLAG_INITIALIZED, initialized); } int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup, From 4404d7d2dda4f3cc84a8fb6ac5417a2afc3b22d6 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sat, 11 Apr 2026 07:29:38 +0100 Subject: [PATCH 021/554] drm/nouveau: fix nvkm_device leak on aperture removal failure commit 6597ff1d8de3f583be169587efeafd8af134e138 upstream. When aperture_remove_conflicting_pci_devices() fails during probe, the error path returns directly without unwinding the nvkm_device that was just allocated by nvkm_device_pci_new(). This leaks both the device wrapper and the pci_enable_device() reference taken inside it. Jump to the existing fail_nvkm label so nvkm_device_del() runs and balances both. The leak was introduced when the intermediate nvkm_device_del() between detection and aperture removal was dropped in favor of creating the pci device once. Fixes: c0bfe34330b5 ("drm/nouveau: create pci device once") Cc: stable@vger.kernel.org Signed-off-by: David Carlier Link: https://patch.msgid.link/20260411062938.22925-1-devnexen@gmail.com Signed-off-by: Danilo Krummrich Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_drm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index dc469e571c0aa..b760586b1727e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -874,7 +874,7 @@ static int nouveau_drm_probe(struct pci_dev *pdev, /* Remove conflicting drivers (vesafb, efifb etc). */ ret = aperture_remove_conflicting_pci_devices(pdev, driver_pci.name); if (ret) - return ret; + goto fail_nvkm; pci_set_master(pdev); From 5447be57b311e983366fc7b733939839441646bd Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sat, 21 Mar 2026 18:27:46 +0100 Subject: [PATCH 022/554] rust: dma: remove DMA_ATTR_NO_KERNEL_MAPPING from public attrs commit 18fb5f1f0289b8217c0c43d54d12bccc201dd640 upstream. When DMA_ATTR_NO_KERNEL_MAPPING is passed to dma_alloc_attrs(), the returned CPU address is not a pointer to the allocated memory but an opaque handle (e.g. struct page *). Coherent (or CoherentAllocation respectively) stores this value as NonNull and exposes methods that dereference it and even modify its contents. Remove the flag from the public attrs module such that drivers cannot pass it to Coherent (or CoherentAllocation respectively) in the first place. Instead DMA_ATTR_NO_KERNEL_MAPPING can be supported with an additional opaque type (e.g. CoherentHandle) which does not provide access to the allocated memory. Cc: stable@vger.kernel.org Fixes: ad2907b4e308 ("rust: add dma coherent allocator abstraction") Signed-off-by: Danilo Krummrich Reviewed-by: Alice Ryhl Reviewed-by: Alexandre Courbot Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260321172749.592387-1-dakr@kernel.org Signed-off-by: Alexandre Courbot Signed-off-by: Greg Kroah-Hartman --- rust/kernel/dma.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 777ea5c2431c7..4ce9e5d7f3dd0 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -232,9 +232,6 @@ pub mod attrs { /// Specifies that writes to the mapping may be buffered to improve performance. pub const DMA_ATTR_WRITE_COMBINE: Attrs = Attrs(bindings::DMA_ATTR_WRITE_COMBINE); - /// Lets the platform to avoid creating a kernel virtual mapping for the allocated buffer. - pub const DMA_ATTR_NO_KERNEL_MAPPING: Attrs = Attrs(bindings::DMA_ATTR_NO_KERNEL_MAPPING); - /// Allows platform code to skip synchronization of the CPU cache for the given buffer assuming /// that it has been already transferred to 'device' domain. pub const DMA_ATTR_SKIP_CPU_SYNC: Attrs = Attrs(bindings::DMA_ATTR_SKIP_CPU_SYNC); From d4d0c91a0afcc021bfbc8845bc536c0de7c8bb5f Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Tue, 31 Mar 2026 22:58:48 +0200 Subject: [PATCH 023/554] kbuild: rust: allow `clippy::uninlined_format_args` commit 10eea3c147141c90cf409b8df56d245c9d7f88d9 upstream. Clippy in Rust 1.88.0 (only) reports [1]: warning: variables can be used directly in the `format!` string --> rust/macros/module.rs:112:23 | 112 | let content = format!("{param}:{content}", param = param, content = content); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args = note: `-W clippy::uninlined-format-args` implied by `-W clippy::all` = help: to override `-W clippy::all` add `#[allow(clippy::uninlined_format_args)]` help: change this to | 112 - let content = format!("{param}:{content}", param = param, content = content); 112 + let content = format!("{param}:{content}"); warning: variables can be used directly in the `format!` string --> rust/macros/module.rs:198:14 | 198 | t => panic!("Unsupported parameter type {}", t), | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args = note: `-W clippy::uninlined-format-args` implied by `-W clippy::all` = help: to override `-W clippy::all` add `#[allow(clippy::uninlined_format_args)]` help: change this to | 198 - t => panic!("Unsupported parameter type {}", t), 198 + t => panic!("Unsupported parameter type {t}"), | The reason it only triggers in that version is that the lint was moved from `pedantic` to `style` in Rust 1.88.0 and then back to `pedantic` in Rust 1.89.0 [2][3]. In the first case, the suggestion is fair and a pure simplification, thus we will clean it up separately. To keep the behavior the same across all versions, and since the lint does not work for all macros (e.g. custom ones like `pr_info!`), disable it globally. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://lore.kernel.org/rust-for-linux/CANiq72=drAtf3y_DZ-2o4jb6Az9J3Yj4QYwWnbRui4sm4AJD3Q@mail.gmail.com/ [1] Link: https://github.com/rust-lang/rust-clippy/pull/15287 [2] Link: https://github.com/rust-lang/rust-clippy/issues/15151 [3] Link: https://patch.msgid.link/20260331205849.498295-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index f1b9b5849b79b..a3eefbf12ed92 100644 --- a/Makefile +++ b/Makefile @@ -492,6 +492,7 @@ export rust_common_flags := --edition=2021 \ -Wclippy::ptr_cast_constness \ -Wclippy::ref_as_ptr \ -Wclippy::undocumented_unsafe_blocks \ + -Aclippy::uninlined_format_args \ -Wclippy::unnecessary_safety_comment \ -Wclippy::unnecessary_safety_doc \ -Wrustdoc::missing_crate_level_docs \ From f51f85c044809fbd39ac8ae07ac99bc43ce32bd5 Mon Sep 17 00:00:00 2001 From: "Lorenzo Stoakes (Oracle)" Date: Fri, 20 Mar 2026 22:39:35 +0000 Subject: [PATCH 024/554] fs: afs: revert mmap_prepare() change commit fbfc6578eaca12daa0c09df1e9ba7f2c657b49da upstream. Partially reverts commit 9d5403b1036c ("fs: convert most other generic_file_*mmap() users to .mmap_prepare()"). This is because the .mmap invocation establishes a refcount, but .mmap_prepare is called at a point where a merge or an allocation failure might happen after the call, which would leak the refcount increment. Functionality is being added to permit the use of .mmap_prepare in this case, but in the interim, we need to fix this. Link: https://lkml.kernel.org/r/08804c94e39d9102a3a8fbd12385e8aa079ba1d3.1774045440.git.ljs@kernel.org Fixes: 9d5403b1036c ("fs: convert most other generic_file_*mmap() users to .mmap_prepare()") Signed-off-by: Lorenzo Stoakes (Oracle) Acked-by: Vlastimil Babka (SUSE) Cc: Alexander Shishkin Cc: Alexandre Torgue Cc: Al Viro Cc: Arnd Bergmann Cc: Bodo Stroesser Cc: Christian Brauner Cc: Clemens Ladisch Cc: David Hildenbrand Cc: David Howells Cc: Dexuan Cui Cc: Greg Kroah-Hartman Cc: Haiyang Zhang Cc: Jan Kara Cc: Jann Horn Cc: Jonathan Corbet Cc: K. Y. Srinivasan Cc: Liam Howlett Cc: Long Li Cc: Marc Dionne Cc: "Martin K. Petersen" Cc: Maxime Coquelin Cc: Michal Hocko Cc: Mike Rapoport Cc: Miquel Raynal Cc: Pedro Falcato Cc: Richard Weinberger Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Vignesh Raghavendra Cc: Vlastimil Babka (SUSE) Cc: Wei Liu Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/afs/file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index f66a922942849..fc15497608c61 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -19,7 +19,7 @@ #include #include "internal.h" -static int afs_file_mmap_prepare(struct vm_area_desc *desc); +static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos, @@ -35,7 +35,7 @@ const struct file_operations afs_file_operations = { .llseek = generic_file_llseek, .read_iter = afs_file_read_iter, .write_iter = netfs_file_write_iter, - .mmap_prepare = afs_file_mmap_prepare, + .mmap = afs_file_mmap, .splice_read = afs_file_splice_read, .splice_write = iter_file_splice_write, .fsync = afs_fsync, @@ -492,16 +492,16 @@ static void afs_drop_open_mmap(struct afs_vnode *vnode) /* * Handle setting up a memory mapping on an AFS file. */ -static int afs_file_mmap_prepare(struct vm_area_desc *desc) +static int afs_file_mmap(struct file *file, struct vm_area_struct *vma) { - struct afs_vnode *vnode = AFS_FS_I(file_inode(desc->file)); + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); int ret; afs_add_open_mmap(vnode); - ret = generic_file_mmap_prepare(desc); + ret = generic_file_mmap(file, vma); if (ret == 0) - desc->vm_ops = &afs_vm_ops; + vma->vm_ops = &afs_vm_ops; else afs_drop_open_mmap(vnode); return ret; From ecb3e4fa31ffa90b1398668e7950a3fdd52b0fc1 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 17 Feb 2026 16:56:12 +0100 Subject: [PATCH 025/554] firmware: google: framebuffer: Do not mark framebuffer as busy commit f3850d399de3b6142b02315227ef9e772ed0c302 upstream. Remove the flag IORESOURCE_BUSY flag from coreboot's framebuffer resource. It prevents simpledrm from successfully requesting the range for its own use; resulting in errors such as [ 2.775430] simple-framebuffer simple-framebuffer.0: [drm] could not acquire memory region [mem 0x80000000-0x80407fff flags 0x80000200] As with other uses of simple-framebuffer, the simple-framebuffer device should only declare it's I/O resources, but not actively use them. Signed-off-by: Thomas Zimmermann Fixes: 851b4c14532d ("firmware: coreboot: Add coreboot framebuffer driver") Acked-by: Tzung-Bi Shih Acked-by: Julius Werner Cc: Samuel Holland Cc: Greg Kroah-Hartman Cc: Tzung-Bi Shih Cc: Brian Norris Cc: Julius Werner Cc: chrome-platform@lists.linux.dev Cc: # v4.18+ Link: https://patch.msgid.link/20260217155836.96267-3-tzimmermann@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/google/framebuffer-coreboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/google/framebuffer-coreboot.c b/drivers/firmware/google/framebuffer-coreboot.c index c68c9f56370f2..0ad813536202d 100644 --- a/drivers/firmware/google/framebuffer-coreboot.c +++ b/drivers/firmware/google/framebuffer-coreboot.c @@ -67,7 +67,7 @@ static int framebuffer_probe(struct coreboot_device *dev) return -ENODEV; memset(&res, 0, sizeof(res)); - res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res.flags = IORESOURCE_MEM; res.name = "Coreboot Framebuffer"; res.start = fb->physical_address; length = PAGE_ALIGN(fb->y_resolution * fb->bytes_per_line); From 3f8805b8ca23023ad75fcfb54f92e6df5c383221 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 9 Mar 2026 02:57:24 +0000 Subject: [PATCH 026/554] arm64/mm: Enable batched TLB flush in unmap_hotplug_range() commit 48478b9f791376b4b89018d7afdfd06865498f65 upstream. During a memory hot remove operation, both linear and vmemmap mappings for the memory range being removed, get unmapped via unmap_hotplug_range() but mapped pages get freed only for vmemmap mapping. This is just a sequential operation where each table entry gets cleared, followed by a leaf specific TLB flush, and then followed by memory free operation when applicable. This approach was simple and uniform both for vmemmap and linear mappings. But linear mapping might contain CONT marked block memory where it becomes necessary to first clear out all entire in the range before a TLB flush. This is as per the architecture requirement. Hence batch all TLB flushes during the table tear down walk and finally do it in unmap_hotplug_range(). Prior to this fix, it was hypothetically possible for a speculative access to a higher address in the contiguous block to fill the TLB with shattered entries for the entire contiguous range after a lower address had already been cleared and invalidated. Due to the table entries being shattered, the subsequent TLB invalidation for the higher address would not then clear the TLB entries for the lower address, meaning stale TLB entries could persist. Besides it also helps in improving the performance via TLBI range operation along with reduced synchronization instructions. The time spent executing unmap_hotplug_range() improved 97% measured over a 2GB memory hot removal in KVM guest. This scheme is not applicable during vmemmap mapping tear down where memory needs to be freed and hence a TLB flush is required after clearing out page table entry. Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Closes: https://lore.kernel.org/all/aWZYXhrT6D2M-7-N@willie-the-truck/ Fixes: bbd6ec605c0f ("arm64/mm: Enable memory hot remove") Cc: stable@vger.kernel.org Reviewed-by: David Hildenbrand (Arm) Reviewed-by: Ryan Roberts Signed-off-by: Ryan Roberts Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/mmu.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ed04c42a826d8..75698900c17df 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1465,10 +1465,14 @@ static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr, WARN_ON(!pte_present(pte)); __pte_clear(&init_mm, addr, ptep); - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - if (free_mapped) + if (free_mapped) { + /* CONT blocks are not supported in the vmemmap */ + WARN_ON(pte_cont(pte)); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); free_hotplug_page_range(pte_page(pte), PAGE_SIZE, altmap); + } + /* unmap_hotplug_range() flushes TLB for !free_mapped */ } while (addr += PAGE_SIZE, addr < end); } @@ -1489,15 +1493,14 @@ static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr, WARN_ON(!pmd_present(pmd)); if (pmd_sect(pmd)) { pmd_clear(pmdp); - - /* - * One TLBI should be sufficient here as the PMD_SIZE - * range is mapped with a single block entry. - */ - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - if (free_mapped) + if (free_mapped) { + /* CONT blocks are not supported in the vmemmap */ + WARN_ON(pmd_cont(pmd)); + flush_tlb_kernel_range(addr, addr + PMD_SIZE); free_hotplug_page_range(pmd_page(pmd), PMD_SIZE, altmap); + } + /* unmap_hotplug_range() flushes TLB for !free_mapped */ continue; } WARN_ON(!pmd_table(pmd)); @@ -1522,15 +1525,12 @@ static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr, WARN_ON(!pud_present(pud)); if (pud_sect(pud)) { pud_clear(pudp); - - /* - * One TLBI should be sufficient here as the PUD_SIZE - * range is mapped with a single block entry. - */ - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - if (free_mapped) + if (free_mapped) { + flush_tlb_kernel_range(addr, addr + PUD_SIZE); free_hotplug_page_range(pud_page(pud), PUD_SIZE, altmap); + } + /* unmap_hotplug_range() flushes TLB for !free_mapped */ continue; } WARN_ON(!pud_table(pud)); @@ -1560,6 +1560,7 @@ static void unmap_hotplug_p4d_range(pgd_t *pgdp, unsigned long addr, static void unmap_hotplug_range(unsigned long addr, unsigned long end, bool free_mapped, struct vmem_altmap *altmap) { + unsigned long start = addr; unsigned long next; pgd_t *pgdp, pgd; @@ -1581,6 +1582,9 @@ static void unmap_hotplug_range(unsigned long addr, unsigned long end, WARN_ON(!pgd_present(pgd)); unmap_hotplug_p4d_range(pgdp, addr, next, free_mapped, altmap); } while (addr = next, addr < end); + + if (!free_mapped) + flush_tlb_kernel_range(start, end); } static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr, From 944a6864aa06d8bd393272515ed0a2a629933cac Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Thu, 12 Mar 2026 03:47:23 -0700 Subject: [PATCH 027/554] mm: migrate: requeue destination folio on deferred split queue commit a2e0c0668a3486f96b86c50e02872c8e94fd4f9c upstream. During folio migration, __folio_migrate_mapping() removes the source folio from the deferred split queue, but the destination folio is never re-queued. This causes underutilized THPs to escape the shrinker after NUMA migration, since they silently drop off the deferred split list. Fix this by recording whether the source folio was on the deferred split queue and its partially mapped state before move_to_new_folio() unqueues it, and re-queuing the destination folio after a successful migration if it was. By the time migrate_folio_move() runs, partially mapped folios without a pin have already been split by migrate_pages_batch(). So only two cases remain on the deferred list at this point: 1. Partially mapped folios with a pin (split failed). 2. Fully mapped but potentially underused folios. The recorded partially_mapped state is forwarded to deferred_split_folio() so that the destination folio is correctly re-queued in both cases. Because THPs are removed from the deferred_list, THP shinker cannot split the underutilized THPs in time. As a result, users will show less free memory than before. Link: https://lkml.kernel.org/r/20260312104723.1351321-1-usama.arif@linux.dev Fixes: dafff3f4c850 ("mm: split underused THPs") Signed-off-by: Usama Arif Reported-by: Johannes Weiner Acked-by: Johannes Weiner Acked-by: Zi Yan Acked-by: David Hildenbrand (Arm) Acked-by: SeongJae Park Reviewed-by: Wei Yang Cc: Alistair Popple Cc: Byungchul Park Cc: Gregory Price Cc: "Huang, Ying" Cc: Joshua Hahn Cc: Matthew Brost Cc: Matthew Wilcox (Oracle) Cc: Nico Pache Cc: Rakie Kim Cc: Ying Huang Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/mm/migrate.c b/mm/migrate.c index a936623d0b237..ad7af66a4450a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1349,6 +1349,8 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private, int rc; int old_page_state = 0; struct anon_vma *anon_vma = NULL; + bool src_deferred_split = false; + bool src_partially_mapped = false; struct list_head *prev; __migrate_folio_extract(dst, &old_page_state, &anon_vma); @@ -1362,6 +1364,12 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private, goto out_unlock_both; } + if (folio_order(src) > 1 && + !data_race(list_empty(&src->_deferred_list))) { + src_deferred_split = true; + src_partially_mapped = folio_test_partially_mapped(src); + } + rc = move_to_new_folio(dst, src, mode); if (rc) goto out; @@ -1382,6 +1390,15 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private, if (old_page_state & PAGE_WAS_MAPPED) remove_migration_ptes(src, dst, 0); + /* + * Requeue the destination folio on the deferred split queue if + * the source was on the queue. The source is unqueued in + * __folio_migrate_mapping(), so we recorded the state from + * before move_to_new_folio(). + */ + if (src_deferred_split) + deferred_split_folio(dst, src_partially_mapped); + out_unlock_both: folio_unlock(dst); folio_set_owner_migrate_reason(dst, reason); From 3c636a3edca9c3f180b3079f94fe7e115730d9c6 Mon Sep 17 00:00:00 2001 From: Heming Zhao Date: Thu, 2 Apr 2026 21:43:27 +0800 Subject: [PATCH 028/554] ocfs2: split transactions in dio completion to avoid credit exhaustion commit d647c5b2fbf81560818dacade360abc8c00a9665 upstream. During ocfs2 dio operations, JBD2 may report warnings via following call trace: ocfs2_dio_end_io_write ocfs2_mark_extent_written ocfs2_change_extent_flag ocfs2_split_extent ocfs2_try_to_merge_extent ocfs2_extend_rotate_transaction ocfs2_extend_trans jbd2__journal_restart start_this_handle output: JBD2: kworker/6:2 wants too many credits credits:5450 rsv_credits:0 max:5449 To prevent exceeding the credits limit, modify ocfs2_dio_end_io_write() to handle extents in a batch of transaction. Additionally, relocate ocfs2_del_inode_from_orphan(). The orphan inode should only be removed from the orphan list after the extent tree update is complete. This ensures that if a crash occurs in the middle of extent tree updates, we won't leave stale blocks beyond EOF. This patch also changes the logic for updating the inode size and removing orphan, making it similar to ext4_dio_write_end_io(). Both operations are performed only when everything looks good. Finally, thanks to Jans and Joseph for providing the bug fix prototype and suggestions. Link: https://lkml.kernel.org/r/20260402134328.27334-2-heming.zhao@suse.com Signed-off-by: Heming Zhao Suggested-by: Jan Kara Suggested-by: Joseph Qi Reviewed-by: Jan Kara Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/aops.c | 74 ++++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 29 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 7a65d5a36a3e1..4ff9f2b64fe95 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -37,6 +37,8 @@ #include "namei.h" #include "sysfile.h" +#define OCFS2_DIO_MARK_EXTENT_BATCH 200 + static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -2278,7 +2280,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode, struct ocfs2_alloc_context *meta_ac = NULL; handle_t *handle = NULL; loff_t end = offset + bytes; - int ret = 0, credits = 0; + int ret = 0, credits = 0, batch = 0; ocfs2_init_dealloc_ctxt(&dealloc); @@ -2295,18 +2297,6 @@ static int ocfs2_dio_end_io_write(struct inode *inode, goto out; } - /* Delete orphan before acquire i_rwsem. */ - if (dwc->dw_orphaned) { - BUG_ON(dwc->dw_writer_pid != task_pid_nr(current)); - - end = end > i_size_read(inode) ? end : 0; - - ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, - !!end, end); - if (ret < 0) - mlog_errno(ret); - } - down_write(&oi->ip_alloc_sem); di = (struct ocfs2_dinode *)di_bh->b_data; @@ -2327,24 +2317,25 @@ static int ocfs2_dio_end_io_write(struct inode *inode, credits = ocfs2_calc_extend_credits(inode->i_sb, &di->id2.i_list); - handle = ocfs2_start_trans(osb, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto unlock; - } - ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) { - mlog_errno(ret); - goto commit; - } - list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) { + if (!handle) { + handle = ocfs2_start_trans(osb, credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto unlock; + } + ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto commit; + } + } ret = ocfs2_assure_trans_credits(handle, credits); if (ret < 0) { mlog_errno(ret); - break; + goto commit; } ret = ocfs2_mark_extent_written(inode, &et, handle, ue->ue_cpos, 1, @@ -2352,19 +2343,44 @@ static int ocfs2_dio_end_io_write(struct inode *inode, meta_ac, &dealloc); if (ret < 0) { mlog_errno(ret); - break; + goto commit; + } + + if (++batch == OCFS2_DIO_MARK_EXTENT_BATCH) { + ocfs2_commit_trans(osb, handle); + handle = NULL; + batch = 0; } } if (end > i_size_read(inode)) { + if (!handle) { + handle = ocfs2_start_trans(osb, credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto unlock; + } + } ret = ocfs2_set_inode_size(handle, inode, di_bh, end); if (ret < 0) mlog_errno(ret); } + commit: - ocfs2_commit_trans(osb, handle); + if (handle) + ocfs2_commit_trans(osb, handle); unlock: up_write(&oi->ip_alloc_sem); + + /* everything looks good, let's start the cleanup */ + if (!ret && dwc->dw_orphaned) { + BUG_ON(dwc->dw_writer_pid != task_pid_nr(current)); + + ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 0, 0); + if (ret < 0) + mlog_errno(ret); + } ocfs2_inode_unlock(inode, 1); brelse(di_bh); out: From a516d43886623e3cca5fa3446bed8fc7c7982be2 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 10 Apr 2026 21:13:43 -0700 Subject: [PATCH 029/554] Input: edt-ft5x06 - fix use-after-free in debugfs teardown commit f5f9e07060519e2287e99019a6de1eb3ebb65c37 upstream. The commit 68743c500c6e ("Input: edt-ft5x06 - use per-client debugfs directory") removed the manual debugfs teardown, relying on the I2C core to handle it. However, this creates a window where debugfs files are still accessible after edt_ft5x06_ts_teardown_debugfs() frees tsdata->raw_buffer. To prevent a use-after-free, protect the freeing of raw_buffer with the device mutex and set raw_buffer to NULL. The debugfs read function already checks if raw_buffer is NULL under the same mutex, so this safely avoids the use-after-free. Fixes: 68743c500c6e ("Input: edt-ft5x06 - use per-client debugfs directory") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/adnJicDh-bTUaWXP@google.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/edt-ft5x06.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index bf498bd4dea96..4efdb467b6c61 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -829,7 +829,10 @@ static void edt_ft5x06_ts_prepare_debugfs(struct edt_ft5x06_ts_data *tsdata) static void edt_ft5x06_ts_teardown_debugfs(struct edt_ft5x06_ts_data *tsdata) { + guard(mutex)(&tsdata->mutex); + kfree(tsdata->raw_buffer); + tsdata->raw_buffer = NULL; } #else From a02363f71a79b755daa78a70d6b217f9c13c8c85 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 31 Mar 2026 16:42:44 +0900 Subject: [PATCH 030/554] zram: do not forget to endio for partial discard requests commit e3668b371329ea036ff022ce8ecc82f8befcf003 upstream. As reported by Qu Wenruo and Avinesh Kumar, the following getconf PAGESIZE 65536 blkdiscard -p 4k /dev/zram0 takes literally forever to complete. zram doesn't support partial discards and just returns immediately w/o doing any discard work in such cases. The problem is that we forget to endio on our way out, so blkdiscard sleeps forever in submit_bio_wait(). Fix this by jumping to end_bio label, which does bio_endio(). Link: https://lore.kernel.org/20260331074255.777019-1-senozhatsky@chromium.org Fixes: 0120dd6e4e20 ("zram: make zram_bio_discard more self-contained") Signed-off-by: Sergey Senozhatsky Reported-by: Qu Wenruo Closes: https://lore.kernel.org/linux-block/92361cd3-fb8b-482e-bc89-15ff1acb9a59@suse.com Tested-by: Qu Wenruo Reported-by: Avinesh Kumar Closes: https://bugzilla.suse.com/show_bug.cgi?id=1256530 Reviewed-by: Christoph Hellwig Cc: Brian Geffon Cc: Jens Axboe Cc: Minchan Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- drivers/block/zram/zram_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a430746575312..dc6a53c9166aa 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2308,7 +2308,7 @@ static void zram_bio_discard(struct zram *zram, struct bio *bio) */ if (offset) { if (n <= (PAGE_SIZE - offset)) - return; + goto end_bio; n -= (PAGE_SIZE - offset); index++; @@ -2323,6 +2323,7 @@ static void zram_bio_discard(struct zram *zram, struct bio *bio) n -= PAGE_SIZE; } +end_bio: bio_endio(bio); } From 000134a20bbf89b1152520a2eef71f91fdb83a5b Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Fri, 20 Feb 2026 12:47:30 +0300 Subject: [PATCH 031/554] wifi: rtw88: check for PCI upstream bridge existence commit eb101d2abdcccb514ca4fccd3b278dd8267374f6 upstream. pci_upstream_bridge() returns NULL if the device is on a root bus. If 8821CE is installed in the system with such a PCI topology, the probing routine will crash. This has probably been unnoticed as 8821CE is mostly supplied in laptops where there is a PCI-to-PCI bridge located upstream from the device. However the card might be installed on a system with different configuration. Check if the bridge does exist for the specific workaround to be applied. Found by Linux Verification Center (linuxtesting.org) with Svace static analysis tool. Fixes: 24f5e38a13b5 ("rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260220094730.49791-1-pchelkin@ispras.ru Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtw88/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c index 56b16186d3aa4..ec0a45bfb670e 100644 --- a/drivers/net/wireless/realtek/rtw88/pci.c +++ b/drivers/net/wireless/realtek/rtw88/pci.c @@ -1804,7 +1804,8 @@ int rtw_pci_probe(struct pci_dev *pdev, } /* Disable PCIe ASPM L1 while doing NAPI poll for 8821CE */ - if (rtwdev->chip->id == RTW_CHIP_TYPE_8821C && bridge->vendor == PCI_VENDOR_ID_INTEL) + if (rtwdev->chip->id == RTW_CHIP_TYPE_8821C && + bridge && bridge->vendor == PCI_VENDOR_ID_INTEL) rtwpci->rx_no_aspm = true; rtw_pci_phy_cfg(rtwdev); From 4e179a60a60c0a5aea245e8e67768343c0f070b8 Mon Sep 17 00:00:00 2001 From: Daniel Hodges Date: Fri, 6 Feb 2026 14:44:01 -0500 Subject: [PATCH 032/554] wifi: mwifiex: fix use-after-free in mwifiex_adapter_cleanup() commit ae5e95d4157481693be2317e3ffcd84e36010cbb upstream. The mwifiex_adapter_cleanup() function uses timer_delete() (non-synchronous) for the wakeup_timer before the adapter structure is freed. This is incorrect because timer_delete() does not wait for any running timer callback to complete. If the wakeup_timer callback (wakeup_timer_fn) is executing when mwifiex_adapter_cleanup() is called, the callback will continue to access adapter fields (adapter->hw_status, adapter->if_ops.card_reset, etc.) which may be freed by mwifiex_free_adapter() called later in the mwifiex_remove_card() path. Use timer_delete_sync() instead to ensure any running timer callback has completed before returning. Fixes: 4636187da60b ("mwifiex: add wakeup timer based recovery mechanism") Cc: stable@vger.kernel.org Signed-off-by: Daniel Hodges Link: https://patch.msgid.link/20260206194401.2346-1-git@danielhodges.dev Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c index 4820010a86f6b..5902e2c821f28 100644 --- a/drivers/net/wireless/marvell/mwifiex/init.c +++ b/drivers/net/wireless/marvell/mwifiex/init.c @@ -391,7 +391,7 @@ static void mwifiex_invalidate_lists(struct mwifiex_adapter *adapter) static void mwifiex_adapter_cleanup(struct mwifiex_adapter *adapter) { - timer_delete(&adapter->wakeup_timer); + timer_delete_sync(&adapter->wakeup_timer); cancel_delayed_work_sync(&adapter->devdump_work); mwifiex_cancel_all_pending_cmd(adapter); wake_up_interruptible(&adapter->cmd_wait_q.wait); From 7aa27b20e938bfd1b0035f423474b1e26bc7e9c3 Mon Sep 17 00:00:00 2001 From: Manish Honap Date: Tue, 17 Mar 2026 10:44:02 +0530 Subject: [PATCH 033/554] vfio: selftests: Fix VLA initialisation in vfio_pci_irq_set() commit 4f42d716707654134789a0205a050b0d022be948 upstream. C does not permit an initialiser expression on a variable-length array (C99 Section 6.7.9 constraint: "The type of the entity to be initialized shall not be a variable length array type"). vfio_pci_irq_set() declared: u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count] = {}; where `count` is a runtime function parameter, making `buf` a VLA. GCC rejects this with (tried with GCC-9.4.0): error: variable-sized object may not be initialized Fix by removing the `= {}` initialiser and inserting an explicit memset() immediately after the declaration. memset() on a VLA is perfectly legal and achieves the same zero-initialisation on all conforming C implementations. Fixes: 19faf6fd969c ("vfio: selftests: Add a helper library for VFIO selftests") Cc: stable@vger.kernel.org Reviewed-by: Dave Jiang Reviewed-by: David Matlack Signed-off-by: Manish Honap Link: https://lore.kernel.org/r/20260317051402.3725670-1-mhonap@nvidia.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/vfio/lib/vfio_pci_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c index b479a359da121..77ba3fa4e2c89 100644 --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c @@ -303,10 +303,12 @@ iova_t to_iova(struct vfio_pci_device *device, void *vaddr) static void vfio_pci_irq_set(struct vfio_pci_device *device, u32 index, u32 vector, u32 count, int *fds) { - u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count] = {}; + u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count]; struct vfio_irq_set *irq = (void *)&buf; int *irq_fds = (void *)&irq->data; + memset(buf, 0, sizeof(buf)); + irq->argsz = sizeof(buf); irq->flags = VFIO_IRQ_SET_ACTION_TRIGGER; irq->index = index; From 08da3594c8ba49da00ef4c75ff5a5f7b38fa6cfe Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 14 Apr 2026 14:06:19 -0600 Subject: [PATCH 034/554] vfio/virtio: Convert list_lock from spinlock to mutex commit 903570835f12b7436ca0edb0a9ed351c0349121e upstream. The list_lock spinlock with IRQ disabling was copied from the mlx5 vfio-pci variant driver, where it is justified by a hardirq async command completion callback that accesses the protected lists. The virtio driver has no such interrupt context usage; all list_lock acquisitions occur in process context via file read/write operations or state transitions under state_mutex. Convert list_lock to a mutex to be consistent with peer vfio-pci variant drivers (hisilicon, pds, qat, xe) which all use mutexes for equivalent migration data protection. This also fixes a mismatched spin_lock()/spin_unlock_irq() pair in virtiovf_read_device_context_chunk() that could incorrectly enable interrupts. Reported-by: Jinhui Guo Closes: https://lore.kernel.org/all/20260413073603.30538-1-guojinhui.liam@bytedance.com Fixes: 0bbc82e4ec79 ("vfio/virtio: Add support for the basic live migration functionality") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-6 Signed-off-by: Alex Williamson Reviewed-by: Yishai Hadas Link: https://lore.kernel.org/r/20260414200625.3601509-2-alex.williamson@nvidia.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/virtio/common.h | 2 +- drivers/vfio/pci/virtio/migrate.c | 33 ++++++++++++++++--------------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/vfio/pci/virtio/common.h b/drivers/vfio/pci/virtio/common.h index c7d7e27af386e..7980f93a8631b 100644 --- a/drivers/vfio/pci/virtio/common.h +++ b/drivers/vfio/pci/virtio/common.h @@ -68,7 +68,7 @@ struct virtiovf_migration_file { enum virtiovf_migf_state state; enum virtiovf_load_state load_state; /* synchronize access to the lists */ - spinlock_t list_lock; + struct mutex list_lock; struct list_head buf_list; struct list_head avail_list; struct virtiovf_data_buffer *buf; diff --git a/drivers/vfio/pci/virtio/migrate.c b/drivers/vfio/pci/virtio/migrate.c index 7dd0ac866461d..405add0adcebb 100644 --- a/drivers/vfio/pci/virtio/migrate.c +++ b/drivers/vfio/pci/virtio/migrate.c @@ -142,9 +142,9 @@ virtiovf_alloc_data_buffer(struct virtiovf_migration_file *migf, size_t length) static void virtiovf_put_data_buffer(struct virtiovf_data_buffer *buf) { - spin_lock_irq(&buf->migf->list_lock); + mutex_lock(&buf->migf->list_lock); list_add_tail(&buf->buf_elm, &buf->migf->avail_list); - spin_unlock_irq(&buf->migf->list_lock); + mutex_unlock(&buf->migf->list_lock); } static int @@ -170,21 +170,21 @@ virtiovf_get_data_buffer(struct virtiovf_migration_file *migf, size_t length) INIT_LIST_HEAD(&free_list); - spin_lock_irq(&migf->list_lock); + mutex_lock(&migf->list_lock); list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) { list_del_init(&buf->buf_elm); if (buf->allocated_length >= length) { - spin_unlock_irq(&migf->list_lock); + mutex_unlock(&migf->list_lock); goto found; } /* * Prevent holding redundant buffers. Put in a free - * list and call at the end not under the spin lock + * list and call at the end not under the mutex * (&migf->list_lock) to minimize its scope usage. */ list_add(&buf->buf_elm, &free_list); } - spin_unlock_irq(&migf->list_lock); + mutex_unlock(&migf->list_lock); buf = virtiovf_alloc_data_buffer(migf, length); found: @@ -295,6 +295,7 @@ static int virtiovf_release_file(struct inode *inode, struct file *filp) struct virtiovf_migration_file *migf = filp->private_data; virtiovf_disable_fd(migf); + mutex_destroy(&migf->list_lock); mutex_destroy(&migf->lock); kfree(migf); return 0; @@ -308,7 +309,7 @@ virtiovf_get_data_buff_from_pos(struct virtiovf_migration_file *migf, bool found = false; *end_of_data = false; - spin_lock_irq(&migf->list_lock); + mutex_lock(&migf->list_lock); if (list_empty(&migf->buf_list)) { *end_of_data = true; goto end; @@ -329,7 +330,7 @@ virtiovf_get_data_buff_from_pos(struct virtiovf_migration_file *migf, migf->state = VIRTIOVF_MIGF_STATE_ERROR; end: - spin_unlock_irq(&migf->list_lock); + mutex_unlock(&migf->list_lock); return found ? buf : NULL; } @@ -369,10 +370,10 @@ static ssize_t virtiovf_buf_read(struct virtiovf_data_buffer *vhca_buf, } if (*pos >= vhca_buf->start_pos + vhca_buf->length) { - spin_lock_irq(&vhca_buf->migf->list_lock); + mutex_lock(&vhca_buf->migf->list_lock); list_del_init(&vhca_buf->buf_elm); list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list); - spin_unlock_irq(&vhca_buf->migf->list_lock); + mutex_unlock(&vhca_buf->migf->list_lock); } return done; @@ -554,9 +555,9 @@ virtiovf_add_buf_header(struct virtiovf_data_buffer *header_buf, header_buf->length = sizeof(header); header_buf->start_pos = header_buf->migf->max_pos; migf->max_pos += header_buf->length; - spin_lock_irq(&migf->list_lock); + mutex_lock(&migf->list_lock); list_add_tail(&header_buf->buf_elm, &migf->buf_list); - spin_unlock_irq(&migf->list_lock); + mutex_unlock(&migf->list_lock); return 0; } @@ -621,9 +622,9 @@ virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf, buf->start_pos = buf->migf->max_pos; migf->max_pos += buf->length; - spin_lock(&migf->list_lock); + mutex_lock(&migf->list_lock); list_add_tail(&buf->buf_elm, &migf->buf_list); - spin_unlock_irq(&migf->list_lock); + mutex_unlock(&migf->list_lock); return 0; out_header: @@ -692,7 +693,7 @@ virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev, mutex_init(&migf->lock); INIT_LIST_HEAD(&migf->buf_list); INIT_LIST_HEAD(&migf->avail_list); - spin_lock_init(&migf->list_lock); + mutex_init(&migf->list_lock); migf->virtvdev = virtvdev; lockdep_assert_held(&virtvdev->state_mutex); @@ -1082,7 +1083,7 @@ virtiovf_pci_resume_device_data(struct virtiovf_pci_core_device *virtvdev) mutex_init(&migf->lock); INIT_LIST_HEAD(&migf->buf_list); INIT_LIST_HEAD(&migf->avail_list); - spin_lock_init(&migf->list_lock); + mutex_init(&migf->list_lock); buf = virtiovf_alloc_data_buffer(migf, VIRTIOVF_TARGET_INITIAL_BUF_SIZE); if (IS_ERR(buf)) { From 7b436ade16cc81095d79b79f8efa3af0a4f5c5a2 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 17 Apr 2026 14:27:57 -0600 Subject: [PATCH 035/554] vfio/cdx: Serialize VFIO_DEVICE_SET_IRQS with a per-device mutex commit 670e8864b1a218d72f08db40d0103adf38fa1d9b upstream. vfio_cdx_set_msi_trigger() reads vdev->config_msi and operates on the vdev->cdx_irqs array based on its value, but provides no serialization against concurrent VFIO_DEVICE_SET_IRQS ioctls. Two callers can race such that one observes config_msi as set while another clears it and frees cdx_irqs via vfio_cdx_msi_disable(), resulting in a use-after-free of the cdx_irqs array. Add a cdx_irqs_lock mutex to struct vfio_cdx_device and acquire it in vfio_cdx_set_msi_trigger(), which is the single chokepoint through which all updates to config_msi, cdx_irqs, and msi_count flow, covering both the ioctl path and the close-device cleanup path. This keeps the test of config_msi atomic with the subsequent enable, disable, or trigger operations. Drop the pre-call !cdx_irqs test from vfio_cdx_irqs_cleanup() as part of this change: the optimization it provided is redundant with the !config_msi early-return inside vfio_cdx_msi_disable(), and leaving the test in place would be an unsynchronized read of state the new lock is meant to protect. Fixes: 848e447e000c ("vfio/cdx: add interrupt support") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Alex Williamson Acked-by: Nikhil Agarwal Link: https://lore.kernel.org/r/20260417202800.88287-3-alex.williamson@nvidia.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/cdx/intr.c | 9 ++------- drivers/vfio/cdx/main.c | 19 +++++++++++++++++++ drivers/vfio/cdx/private.h | 3 +++ 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/cdx/intr.c b/drivers/vfio/cdx/intr.c index 986fa2a45fa40..371a8ff01a268 100644 --- a/drivers/vfio/cdx/intr.c +++ b/drivers/vfio/cdx/intr.c @@ -152,6 +152,8 @@ static int vfio_cdx_set_msi_trigger(struct vfio_cdx_device *vdev, if (start + count > cdx_dev->num_msi) return -EINVAL; + guard(mutex)(&vdev->cdx_irqs_lock); + if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) { vfio_cdx_msi_disable(vdev); return 0; @@ -206,12 +208,5 @@ int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev, /* Free All IRQs for the given device */ void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev) { - /* - * Device does not support any interrupt or the interrupts - * were not configured - */ - if (!vdev->cdx_irqs) - return; - vfio_cdx_set_msi_trigger(vdev, 0, 0, 0, VFIO_IRQ_SET_DATA_NONE, NULL); } diff --git a/drivers/vfio/cdx/main.c b/drivers/vfio/cdx/main.c index 5dd5f5ad76865..a9639730cfdf5 100644 --- a/drivers/vfio/cdx/main.c +++ b/drivers/vfio/cdx/main.c @@ -8,6 +8,23 @@ #include "private.h" +static int vfio_cdx_init_dev(struct vfio_device *core_vdev) +{ + struct vfio_cdx_device *vdev = + container_of(core_vdev, struct vfio_cdx_device, vdev); + + mutex_init(&vdev->cdx_irqs_lock); + return 0; +} + +static void vfio_cdx_release_dev(struct vfio_device *core_vdev) +{ + struct vfio_cdx_device *vdev = + container_of(core_vdev, struct vfio_cdx_device, vdev); + + mutex_destroy(&vdev->cdx_irqs_lock); +} + static int vfio_cdx_open_device(struct vfio_device *core_vdev) { struct vfio_cdx_device *vdev = @@ -281,6 +298,8 @@ static int vfio_cdx_mmap(struct vfio_device *core_vdev, static const struct vfio_device_ops vfio_cdx_ops = { .name = "vfio-cdx", + .init = vfio_cdx_init_dev, + .release = vfio_cdx_release_dev, .open_device = vfio_cdx_open_device, .close_device = vfio_cdx_close_device, .ioctl = vfio_cdx_ioctl, diff --git a/drivers/vfio/cdx/private.h b/drivers/vfio/cdx/private.h index 172e48caa3a06..94374b5fc9899 100644 --- a/drivers/vfio/cdx/private.h +++ b/drivers/vfio/cdx/private.h @@ -6,6 +6,8 @@ #ifndef VFIO_CDX_PRIVATE_H #define VFIO_CDX_PRIVATE_H +#include + #define VFIO_CDX_OFFSET_SHIFT 40 static inline u64 vfio_cdx_index_to_offset(u32 index) @@ -31,6 +33,7 @@ struct vfio_cdx_region { struct vfio_cdx_device { struct vfio_device vdev; struct vfio_cdx_region *regions; + struct mutex cdx_irqs_lock; struct vfio_cdx_irq *cdx_irqs; u32 flags; #define BME_SUPPORT BIT(0) From 5d6c349c9823eb819fed8b537b088cf38126018c Mon Sep 17 00:00:00 2001 From: Prasanna Kumar T S M Date: Fri, 17 Apr 2026 14:27:56 -0600 Subject: [PATCH 036/554] vfio/cdx: Fix NULL pointer dereference in interrupt trigger path commit 5ea5880764cbb164afb17a62e76ca75dc371409d upstream. Add validation to ensure MSI is configured before accessing cdx_irqs array in vfio_cdx_set_msi_trigger(). Without this check, userspace can trigger a NULL pointer dereference by calling VFIO_DEVICE_SET_IRQS with VFIO_IRQ_SET_DATA_BOOL or VFIO_IRQ_SET_DATA_NONE flags before ever setting up interrupts via VFIO_IRQ_SET_DATA_EVENTFD. The vfio_cdx_msi_enable() function allocates the cdx_irqs array and sets config_msi to 1 only when called through the EVENTFD path. The trigger loop (for DATA_BOOL/DATA_NONE) assumed this had already been done, but there was no enforcement of this call ordering. This matches the protection used in the PCI VFIO driver where vfio_pci_set_msi_trigger() checks irq_is() before the trigger loop. Fixes: 848e447e000c ("vfio/cdx: add interrupt support") Cc: stable@vger.kernel.org Signed-off-by: Prasanna Kumar T S M Acked-by: Nipun Gupta Signed-off-by: Alex Williamson Acked-by: Nikhil Agarwal Link: https://lore.kernel.org/r/20260417202800.88287-2-alex.williamson@nvidia.com Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/cdx/intr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/vfio/cdx/intr.c b/drivers/vfio/cdx/intr.c index 371a8ff01a268..a588d6fc478bb 100644 --- a/drivers/vfio/cdx/intr.c +++ b/drivers/vfio/cdx/intr.c @@ -177,6 +177,10 @@ static int vfio_cdx_set_msi_trigger(struct vfio_cdx_device *vdev, return ret; } + /* Ensure MSI is configured before accessing cdx_irqs */ + if (!vdev->config_msi) + return -EINVAL; + for (i = start; i < start + count; i++) { if (!vdev->cdx_irqs[i].trigger) continue; From 2810fc760aaf9d60aa2047e90e1ca5cd938062b0 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Wed, 8 Apr 2026 03:01:02 -0400 Subject: [PATCH 037/554] um: drivers: call kernel_strrchr() explicitly in cow_user.c commit 91e901c65b4da02a6fd543e3f0049829ae9645b7 upstream. Building ARCH=um on glibc >= 2.43 fails: arch/um/drivers/cow_user.c: error: implicit declaration of function 'strrchr' [-Wimplicit-function-declaration] glibc 2.43's C23 const-preserving strrchr() macro does not survive UML's global -Dstrrchr=kernel_strrchr remap from arch/um/Makefile. Call kernel_strrchr() directly in cow_user.c so the source no longer depends on the -D rewrite. Fixes: 2c51a4bc0233 ("um: fix strrchr() problems") Suggested-by: Johannes Berg Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-6 Assisted-by: Codex:gpt-5-4 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260408070102.2325572-1-michael.bommarito@gmail.com [remove unnecessary 'extern'] Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- arch/um/drivers/cow_user.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c index 29b46581ddd13..dc1d1bcd85ec2 100644 --- a/arch/um/drivers/cow_user.c +++ b/arch/um/drivers/cow_user.c @@ -15,6 +15,12 @@ #include "cow.h" #include "cow_sys.h" +/* + * arch/um/Makefile remaps strrchr to kernel_strrchr; call the kernel + * name directly to avoid glibc >= 2.43's C23 strrchr macro. + */ +char *kernel_strrchr(const char *, int); + #define PATH_LEN_V1 256 /* unsigned time_t works until year 2106 */ @@ -153,7 +159,7 @@ static int absolutize(char *to, int size, char *from) errno); return -1; } - slash = strrchr(from, '/'); + slash = kernel_strrchr(from, '/'); if (slash != NULL) { *slash = '\0'; if (chdir(from)) { From 75f8f3c3e09122270986de9d7aa347d701676761 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 7 Apr 2026 15:55:19 +0200 Subject: [PATCH 038/554] thermal: core: Fix thermal zone governor cleanup issues commit 41ff66baf81c6541f4f985dd7eac4494d03d9440 upstream. If thermal_zone_device_register_with_trips() fails after adding a thermal governor to the thermal zone being registered, the governor is not removed from it as appropriate which may lead to a memory leak. In turn, thermal_zone_device_unregister() calls thermal_set_governor() without acquiring the thermal zone lock beforehand which may race with a governor update via sysfs and may lead to a use-after-free in that case. Address these issues by adding two thermal_set_governor() calls, one to thermal_release() to remove the governor from the given thermal zone, and one to the thermal zone registration error path to cover failures preceding the thermal zone device registration. Fixes: e33df1d2f3a0 ("thermal: let governors have private data for each thermal zone") Cc: All applicable Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/5092923.31r3eYUQgx@rafael.j.wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/thermal_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index f28c15dd0b926..61e6dc81dc923 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -964,6 +964,7 @@ static void thermal_release(struct device *dev) sizeof("thermal_zone") - 1)) { tz = to_thermal_zone(dev); thermal_zone_destroy_device_groups(tz); + thermal_set_governor(tz, NULL); mutex_destroy(&tz->lock); complete(&tz->removal); } else if (!strncmp(dev_name(dev), "cooling_device", @@ -1607,8 +1608,10 @@ thermal_zone_device_register_with_trips(const char *type, /* sys I/F */ /* Add nodes that are always present via .groups */ result = thermal_zone_create_device_groups(tz); - if (result) + if (result) { + thermal_set_governor(tz, NULL); goto remove_id; + } result = device_register(&tz->device); if (result) @@ -1721,8 +1724,6 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz) cancel_delayed_work_sync(&tz->poll_queue); - thermal_set_governor(tz, NULL); - thermal_thresholds_exit(tz); thermal_remove_hwmon_sysfs(tz); ida_free(&thermal_tz_ida, tz->id); From 132e47030b0b5e398e0da6c59df5a5dae9b52cff Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 24 Mar 2026 09:23:22 +0100 Subject: [PATCH 039/554] spi: imx: fix use-after-free on unbind commit 1c78c2002380a1fe31bfb01a3d5f29809e55a096 upstream. The SPI subsystem frees the controller and any subsystem allocated driver data as part of deregistration (unless the allocation is device managed). Take another reference before deregistering the controller so that the driver data is not freed until the driver is done with it. Fixes: 307c897db762 ("spi: spi-imx: replace struct spi_imx_data::bitbang by pointer to struct spi_controller") Cc: stable@vger.kernel.org # 5.19 Acked-by: Marc Kleine-Budde Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260324082326.901043-2-johan@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-imx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index bbf1fd4fe1e92..dc7ffa8cb2878 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1965,6 +1965,8 @@ static void spi_imx_remove(struct platform_device *pdev) struct spi_imx_data *spi_imx = spi_controller_get_devdata(controller); int ret; + spi_controller_get(controller); + spi_unregister_controller(controller); ret = pm_runtime_get_sync(spi_imx->dev); @@ -1978,6 +1980,8 @@ static void spi_imx_remove(struct platform_device *pdev) pm_runtime_disable(spi_imx->dev); spi_imx_sdma_exit(spi_imx); + + spi_controller_put(controller); } static int spi_imx_runtime_resume(struct device *dev) From ff8a7996dc8bf433efe2126ffdaee5b374a89e30 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 27 Mar 2026 11:43:04 +0100 Subject: [PATCH 040/554] spi: ch341: fix memory leaks on probe failures commit b99e3ddb91b499d920e63a2daff8880be68cfe9e upstream. Make sure to deregister the controller, disable pins, and kill and free the RX URB on probe failures to mirror disconnect and avoid memory leaks and use-after-free. Also add an explicit URB kill on disconnect for symmetry (even if that is not strictly required as USB core would have stopped it in the current setup). Fixes: 8846739f52af ("spi: add ch341a usb2spi driver") Cc: stable@vger.kernel.org # 6.11 Cc: Johannes Thumshirn Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260327104305.1309915-2-johan@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-ch341.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/spi/spi-ch341.c b/drivers/spi/spi-ch341.c index 79d2f9ab4ef03..ded0935662605 100644 --- a/drivers/spi/spi-ch341.c +++ b/drivers/spi/spi-ch341.c @@ -173,17 +173,17 @@ static int ch341_probe(struct usb_interface *intf, ch341->tx_buf = devm_kzalloc(&udev->dev, CH341_PACKET_LENGTH, GFP_KERNEL); - if (!ch341->tx_buf) - return -ENOMEM; + if (!ch341->tx_buf) { + ret = -ENOMEM; + goto err_free_urb; + } usb_fill_bulk_urb(ch341->rx_urb, udev, ch341->read_pipe, ch341->rx_buf, ch341->rx_len, ch341_recv, ch341); ret = usb_submit_urb(ch341->rx_urb, GFP_KERNEL); - if (ret) { - usb_free_urb(ch341->rx_urb); - return -ENOMEM; - } + if (ret) + goto err_free_urb; ctrl->bus_num = -1; ctrl->mode_bits = SPI_CPHA; @@ -195,21 +195,34 @@ static int ch341_probe(struct usb_interface *intf, ret = ch341_config_stream(ch341); if (ret) - return ret; + goto err_kill_urb; ret = ch341_enable_pins(ch341, true); if (ret) - return ret; + goto err_kill_urb; ret = spi_register_controller(ctrl); if (ret) - return ret; + goto err_disable_pins; ch341->spidev = spi_new_device(ctrl, &chip); - if (!ch341->spidev) - return -ENOMEM; + if (!ch341->spidev) { + ret = -ENOMEM; + goto err_unregister; + } return 0; + +err_unregister: + spi_unregister_controller(ctrl); +err_disable_pins: + ch341_enable_pins(ch341, false); +err_kill_urb: + usb_kill_urb(ch341->rx_urb); +err_free_urb: + usb_free_urb(ch341->rx_urb); + + return ret; } static void ch341_disconnect(struct usb_interface *intf) @@ -219,6 +232,7 @@ static void ch341_disconnect(struct usb_interface *intf) spi_unregister_device(ch341->spidev); spi_unregister_controller(ch341->ctrl); ch341_enable_pins(ch341, false); + usb_kill_urb(ch341->rx_urb); usb_free_urb(ch341->rx_urb); } From efc52947247a21bbf79059539bbbd40f4ea76f00 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 29 Apr 2026 18:22:22 +0100 Subject: [PATCH 041/554] mm: call ->free_folio() directly in folio_unmap_invalidate() commit 615d9bb2ccad42f9e21d837431e401db2e471195 upstream. We can only call filemap_free_folio() if we have a reference to (or hold a lock on) the mapping. Otherwise, we've already removed the folio from the mapping so it no longer pins the mapping and the mapping can be removed, causing a use-after-free when accessing mapping->a_ops. Follow the same pattern as __remove_mapping() and load the free_folio function pointer before dropping the lock on the mapping. That lets us make filemap_free_folio() static as this was the only caller outside filemap.c. Link: https://lore.kernel.org/20260413184314.3419945-1-willy@infradead.org Fixes: fb7d3bc41493 ("mm/filemap: drop streaming/uncached pages when writeback completes") Signed-off-by: Matthew Wilcox (Oracle) Reported-by: Google Big Sleep Cc: Jens Axboe Cc: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/filemap.c | 3 ++- mm/internal.h | 1 - mm/truncate.c | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 91dcfe14a67b7..e3339cf37a1f0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -227,7 +227,8 @@ void __filemap_remove_folio(struct folio *folio, void *shadow) page_cache_delete(mapping, folio, shadow); } -void filemap_free_folio(struct address_space *mapping, struct folio *folio) +static void filemap_free_folio(const struct address_space *mapping, + struct folio *folio) { void (*free_folio)(struct folio *); diff --git a/mm/internal.h b/mm/internal.h index c80c6f566c2d9..4cf6a12ff491d 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -471,7 +471,6 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); unsigned find_get_entries(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); -void filemap_free_folio(struct address_space *mapping, struct folio *folio); int truncate_inode_folio(struct address_space *mapping, struct folio *folio); bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end); diff --git a/mm/truncate.c b/mm/truncate.c index 3c5a50ae32741..4bf64fd610c94 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -622,6 +622,7 @@ static int folio_launder(struct address_space *mapping, struct folio *folio) int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio, gfp_t gfp) { + void (*free_folio)(struct folio *); int ret; VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); @@ -648,9 +649,12 @@ int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio, xa_unlock_irq(&mapping->i_pages); if (mapping_shrinkable(mapping)) inode_add_lru(mapping->host); + free_folio = mapping->a_ops->free_folio; spin_unlock(&mapping->host->i_lock); - filemap_free_folio(mapping, folio); + if (free_folio) + free_folio(folio); + folio_put_refs(folio, folio_nr_pages(folio)); return 1; failed: xa_unlock_irq(&mapping->i_pages); From ebc235675f24b0e3f8bc92b8419471d42f837d8f Mon Sep 17 00:00:00 2001 From: Douya Le Date: Sun, 19 Apr 2026 16:52:59 +0800 Subject: [PATCH 042/554] crypto: algif_aead - snapshot IV for async AEAD requests commit 5aa58c3a572b3e3b6c786953339f7978b845cc52 upstream. AF_ALG AEAD AIO requests currently use the socket-wide IV buffer during request processing. For async requests, later socket activity can update that shared state before the original request has fully completed, which can lead to inconsistent IV handling. Snapshot the IV into per-request storage when preparing the AEAD request, so in-flight operations no longer depend on mutable socket state. Fixes: d887c52d6ae4 ("crypto: algif_aead - overhaul memory management") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Co-developed-by: Luxing Yin Signed-off-by: Luxing Yin Tested-by: Yucheng Lu Signed-off-by: Douya Le Signed-off-by: Ren Wei Signed-off-by: Herbert Xu Cc: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- crypto/algif_aead.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index f8bd45f7dc839..cb651ab58d629 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -72,8 +72,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, struct af_alg_ctx *ctx = ask->private; struct crypto_aead *tfm = pask->private; unsigned int as = crypto_aead_authsize(tfm); + unsigned int ivsize = crypto_aead_ivsize(tfm); struct af_alg_async_req *areq; struct scatterlist *rsgl_src, *tsgl_src = NULL; + void *iv; int err = 0; size_t used = 0; /* [in] TX bufs to be en/decrypted */ size_t outlen = 0; /* [out] RX bufs produced by kernel */ @@ -125,10 +127,14 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, /* Allocate cipher request for current operation. */ areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) + - crypto_aead_reqsize(tfm)); + crypto_aead_reqsize(tfm) + ivsize); if (IS_ERR(areq)) return PTR_ERR(areq); + iv = (u8 *)aead_request_ctx(&areq->cra_u.aead_req) + + crypto_aead_reqsize(tfm); + memcpy(iv, ctx->iv, ivsize); + /* convert iovecs of output buffers into RX SGL */ err = af_alg_get_rsgl(sk, msg, flags, areq, outlen, &usedpages); if (err) @@ -187,7 +193,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, /* Initialize the crypto operation */ aead_request_set_crypt(&areq->cra_u.aead_req, tsgl_src, - areq->first_rsgl.sgl.sgt.sgl, used, ctx->iv); + areq->first_rsgl.sgl.sgt.sgl, used, iv); aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen); aead_request_set_tfm(&areq->cra_u.aead_req, tfm); From 77d55bc8675ee851ed639dc9be77325a8024cf67 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 16 Apr 2026 17:00:50 +0800 Subject: [PATCH 043/554] crypto: pcrypt - Fix handling of MAY_BACKLOG requests commit 915b692e6cb723aac658c25eb82c58fd81235110 upstream. MAY_BACKLOG requests can return EBUSY. Handle them by checking for that value and filtering out EINPROGRESS notifications. Reported-by: Yiming Qian Fixes: 5a1436beec57 ("crypto: pcrypt - call the complete function on error") Signed-off-by: Herbert Xu Cc: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- crypto/pcrypt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index c3a9d4f2995c7..ed0feaba23832 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -69,6 +69,9 @@ static void pcrypt_aead_done(void *data, int err) struct pcrypt_request *preq = aead_request_ctx(req); struct padata_priv *padata = pcrypt_request_padata(preq); + if (err == -EINPROGRESS) + return; + padata->info = err; padata_do_serial(padata); @@ -82,7 +85,7 @@ static void pcrypt_aead_enc(struct padata_priv *padata) ret = crypto_aead_encrypt(req); - if (ret == -EINPROGRESS) + if (ret == -EINPROGRESS || ret == -EBUSY) return; padata->info = ret; @@ -133,7 +136,7 @@ static void pcrypt_aead_dec(struct padata_priv *padata) ret = crypto_aead_decrypt(req); - if (ret == -EINPROGRESS) + if (ret == -EINPROGRESS || ret == -EBUSY) return; padata->info = ret; From 824897e69d7ae94f9a65790b74e8e37032e3393c Mon Sep 17 00:00:00 2001 From: Swamil Jain Date: Wed, 15 Apr 2026 16:34:09 +0530 Subject: [PATCH 044/554] dt-bindings: display: ti, am65x-dss: Fix AM62L DSS reg and clock constraints commit 9c469240997584449cfac51a75d1d3d71968c76f upstream. The AM62L DSS [1] support incorrectly used the same register and clock constraints as AM65x, but AM62L has a single video port Fix this by adding conditional constraints that properly define the register regions and clocks for AM62L DSS (single video port) versus other AM65x variants (dual video port). [1]: Section 12.7 (Display Subsystem and Peripherals) Link : https://www.ti.com/lit/pdf/sprujb4 Fixes: cb8d4323302c ("dt-bindings: display: ti,am65x-dss: Add support for AM62L DSS") Cc: stable@vger.kernel.org Signed-off-by: Swamil Jain Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20260415110409.2577633-1-s-jain1@ti.com Signed-off-by: Rob Herring (Arm) Signed-off-by: Greg Kroah-Hartman --- .../bindings/display/ti/ti,am65x-dss.yaml | 70 ++++++++++++++----- 1 file changed, 52 insertions(+), 18 deletions(-) diff --git a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml index 361e9cae6896c..35c795f41e5fa 100644 --- a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml +++ b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml @@ -36,34 +36,50 @@ properties: reg: description: Addresses to each DSS memory region described in the SoC's TRM. - items: - - description: common DSS register area - - description: VIDL1 light video plane - - description: VID video plane - - description: OVR1 overlay manager for vp1 - - description: OVR2 overlay manager for vp2 - - description: VP1 video port 1 - - description: VP2 video port 2 - - description: common1 DSS register area + oneOf: + - items: + - description: common DSS register area + - description: VIDL1 light video plane + - description: VID video plane + - description: OVR1 overlay manager for vp1 + - description: OVR2 overlay manager for vp2 + - description: VP1 video port 1 + - description: VP2 video port 2 + - description: common1 DSS register area + - items: + - description: common DSS register area + - description: VIDL1 light video plane + - description: OVR1 overlay manager for vp1 + - description: VP1 video port 1 + - description: common1 DSS register area reg-names: - items: - - const: common - - const: vidl1 - - const: vid - - const: ovr1 - - const: ovr2 - - const: vp1 - - const: vp2 - - const: common1 + oneOf: + - items: + - const: common + - const: vidl1 + - const: vid + - const: ovr1 + - const: ovr2 + - const: vp1 + - const: vp2 + - const: common1 + - items: + - const: common + - const: vidl1 + - const: ovr1 + - const: vp1 + - const: common1 clocks: + minItems: 2 items: - description: fck DSS functional clock - description: vp1 Video Port 1 pixel clock - description: vp2 Video Port 2 pixel clock clock-names: + minItems: 2 items: - const: fck - const: vp1 @@ -180,6 +196,24 @@ allOf: ports: properties: port@1: false + reg: + maxItems: 5 + reg-names: + maxItems: 5 + clocks: + maxItems: 2 + clock-names: + maxItems: 2 + else: + properties: + reg: + minItems: 8 + reg-names: + minItems: 8 + clocks: + minItems: 3 + clock-names: + minItems: 3 - if: properties: From 7f0f0926f3010b10cff5e93446258f971e42f2fd Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Thu, 9 Apr 2026 02:22:33 +0000 Subject: [PATCH 045/554] of: unittest: fix use-after-free in of_unittest_changeset() commit faecdd423c27f0d6090156a435ba9dbbac0eaddb upstream. The variable 'parent' is assigned the value of 'nchangeset' earlier in the function, meaning both point to the same struct device_node. The call to of_node_put(nchangeset) can decrement the reference count to zero and free the node if there are no other holders. After that, the code still uses 'parent' to check for the presence of a property and to read a string property, leading to a use-after-free. Fix this by moving the of_node_put() call after the last access to 'parent', avoiding the UAF. Fixes: 1c668ea65506 ("of: unittest: Use of_property_present()") Cc: stable@vger.kernel.org Signed-off-by: Wentao Liang Link: https://patch.msgid.link/20260409022233.418103-1-vulab@iscas.ac.cn Signed-off-by: Rob Herring (Arm) Signed-off-by: Greg Kroah-Hartman --- drivers/of/unittest.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 9c184e93f50c6..f19abb42db40d 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -896,8 +896,6 @@ static void __init of_unittest_changeset(void) unittest(!of_changeset_apply(&chgset), "apply failed\n"); - of_node_put(nchangeset); - /* Make sure node names are constructed correctly */ unittest((np = of_find_node_by_path("/testcase-data/changeset/n2/n21")), "'%pOF' not added\n", n21); @@ -919,6 +917,7 @@ static void __init of_unittest_changeset(void) if (!ret) unittest(strcmp(propstr, "hello") == 0, "original value not in updated property after revert"); + of_node_put(nchangeset); of_changeset_destroy(&chgset); of_node_put(n1); From 5b6122a67a295f8a08b7c18d908a1bd974dfaec8 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Thu, 9 Apr 2026 03:48:59 +0000 Subject: [PATCH 046/554] of: unittest: fix use-after-free in testdrv_probe() commit 07fd339b2c253205794bea5d9b4b7548a4546c56 upstream. The function testdrv_probe() retrieves the device_node from the PCI device, applies an overlay, and then immediately calls of_node_put(dn). This releases the reference held by the PCI core, potentially freeing the node if the reference count drops to zero. Later, the same freed pointer 'dn' is passed to of_platform_default_populate(), leading to a use-after-free. The reference to pdev->dev.of_node is owned by the device model and should not be released by the driver. Remove the erroneous of_node_put() to prevent premature freeing. Fixes: 26409dd04589 ("of: unittest: Add pci_dt_testdrv pci driver") Cc: stable@vger.kernel.org Signed-off-by: Wentao Liang Link: https://patch.msgid.link/20260409034859.429071-1-vulab@iscas.ac.cn Signed-off-by: Rob Herring (Arm) Signed-off-by: Greg Kroah-Hartman --- drivers/of/unittest.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index f19abb42db40d..02b780b6e8e25 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -4202,7 +4202,6 @@ static int testdrv_probe(struct pci_dev *pdev, const struct pci_device_id *id) size = info->dtbo_end - info->dtbo_begin; ret = of_overlay_fdt_apply(info->dtbo_begin, size, &ovcs_id, dn); - of_node_put(dn); if (ret) return ret; From b6cb07f02253bdefd2339e57eaa1428a7b28cd0f Mon Sep 17 00:00:00 2001 From: Sanman Pradhan Date: Fri, 10 Apr 2026 00:25:41 +0000 Subject: [PATCH 047/554] hwmon: (powerz) Fix missing usb_kill_urb() on signal interrupt commit b66437cb20a2d9ef201f40b675569f8ea7787c9f upstream. wait_for_completion_interruptible_timeout() returns -ERESTARTSYS when interrupted. This needs to abort the URB and return an error. No data has been received from the device so any reads from the transfer buffer are invalid. The original code tests !ret, which only catches the timeout case (0). On signal delivery (-ERESTARTSYS), !ret is false so the function skips usb_kill_urb() and falls through to read from the unfilled transfer buffer. Fix by capturing the return value into a long (matching the function return type) and handling signal (negative) and timeout (zero) cases with separate checks that both call usb_kill_urb() before returning. Fixes: 4381a36abdf1c ("hwmon: add POWER-Z driver") Cc: stable@vger.kernel.org Signed-off-by: Sanman Pradhan Link: https://lore.kernel.org/r/20260410002521.422645-3-sanman.pradhan@hpe.com Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/powerz.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/powerz.c b/drivers/hwmon/powerz.c index a75b941bd6e2f..96438f5f05d48 100644 --- a/drivers/hwmon/powerz.c +++ b/drivers/hwmon/powerz.c @@ -106,6 +106,7 @@ static void powerz_usb_cmd_complete(struct urb *urb) static int powerz_read_data(struct usb_device *udev, struct powerz_priv *priv) { + long rc; int ret; if (!priv->urb) @@ -127,8 +128,14 @@ static int powerz_read_data(struct usb_device *udev, struct powerz_priv *priv) if (ret) return ret; - if (!wait_for_completion_interruptible_timeout - (&priv->completion, msecs_to_jiffies(5))) { + rc = wait_for_completion_interruptible_timeout(&priv->completion, + msecs_to_jiffies(5)); + if (rc < 0) { + usb_kill_urb(priv->urb); + return rc; + } + + if (rc == 0) { usb_kill_urb(priv->urb); return -EIO; } From b6e61356ad24987be40bf25369d22dd8dd00a513 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Mon, 23 Mar 2026 00:04:08 +0800 Subject: [PATCH 048/554] EDAC/versalnet: Fix device_node leak in mc_probe() commit 5c709b376460ff322580c41600e31c02f7cc0307 upstream. of_parse_phandle() returns a device_node reference that must be released with of_node_put(). The original code never freed r5_core_node on any exit path, causing a memory leak. Fix this by using the automatic cleanup attribute __free(device_node) which ensures of_node_put() is called when the variable goes out of scope. Fixes: d5fe2fec6c40 ("EDAC: Add a driver for the AMD Versal NET DDR controller") Signed-off-by: Felix Gu Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shubhrajyoti Datta Cc: Link: https://patch.msgid.link/20260323-versalnet-v1-1-4ab3012635ef@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/versalnet_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c index 1a1092793092a..dce08dc7f32e7 100644 --- a/drivers/edac/versalnet_edac.c +++ b/drivers/edac/versalnet_edac.c @@ -868,12 +868,12 @@ static void remove_versalnet(struct mc_priv *priv) static int mc_probe(struct platform_device *pdev) { - struct device_node *r5_core_node; struct mc_priv *priv; struct rproc *rp; int rc; - r5_core_node = of_parse_phandle(pdev->dev.of_node, "amd,rproc", 0); + struct device_node *r5_core_node __free(device_node) = + of_parse_phandle(pdev->dev.of_node, "amd,rproc", 0); if (!r5_core_node) { dev_err(&pdev->dev, "amd,rproc: invalid phandle\n"); return -EINVAL; From e0a621459c61384fe648a13287cb1a6dee07d853 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Sat, 28 Feb 2026 16:09:25 +0800 Subject: [PATCH 049/554] PCI: imx6: Skip waiting for L2/L3 Ready on i.MX6SX commit 5f73cf1db829c21b7fd44a8d2587cd395b1b2d76 upstream. On i.MX6SX, the LTSSM registers become inaccessible after the PME_Turn_Off message is sent to the link. So there is no way to verify whether the link has entered L2/L3 Ready state or not. Hence, set IMX_PCIE_FLAG_SKIP_L23_READY flag for i.MX6SX SoC to skip the L2/L3 Ready state polling and let the DWC core wait for 10ms after sending the PME_Turn_Off message as per the PCIe spec r6.0, sec 5.3.3.2.1. Fixes: a528d1a72597 ("PCI: imx6: Use DWC common suspend resume method") Signed-off-by: Richard Zhu [mani: commit log] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Frank Li Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260228080925.1558395-1-hongxing.zhu@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pci-imx6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index a42164c870548..3984c16dbec39 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -1844,6 +1844,7 @@ static const struct imx_pcie_drvdata drvdata[] = { .variant = IMX6SX, .flags = IMX_PCIE_FLAG_IMX_PHY | IMX_PCIE_FLAG_SPEED_CHANGE_WORKAROUND | + IMX_PCIE_FLAG_SKIP_L23_READY | IMX_PCIE_FLAG_SUPPORTS_SUSPEND, .gpr = "fsl,imx6q-iomuxc-gpr", .ltssm_off = IOMUXC_GPR12, From fdc150dac1adb9a98be9d6956cff0348838b024a Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Fri, 6 Mar 2026 14:59:50 +0800 Subject: [PATCH 050/554] media: amphion: Fix race between m2m job_abort and device_run commit 8cd35ceadcfc8c5da2eb7f7ce24525ce9d4ee62e upstream. Fix kernel panic caused by race condition where v4l2_m2m_ctx_release() frees m2m_ctx while v4l2_m2m_try_run() is about to call device_run with the same context. Race sequence: v4l2_m2m_try_run(): v4l2_m2m_ctx_release(): lock/unlock v4l2_m2m_cancel_job() job_abort() v4l2_m2m_job_finish() kfree(m2m_ctx) <- frees ctx device_run() <- use-after-free crash at 0x538 Crash trace: Unable to handle kernel read from unreadable memory at virtual address 0000000000000538 v4l2_m2m_try_run+0x78/0x138 v4l2_m2m_device_run_work+0x14/0x20 The amphion vpu driver does not rely on the m2m framework's device_run callback to perform encode/decode operations. Fix the race by preventing m2m framework job scheduling entirely: - Add job_ready callback returning 0 (no jobs ready for m2m framework) - Remove job_abort callback to avoid the race condition Fixes: 3cd084519c6f ("media: amphion: add vpu v4l2 m2m support") Cc: stable@vger.kernel.org Signed-off-by: Ming Qian Reviewed-by: Nicolas Dufresne Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/amphion/vpu_v4l2.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c index 1fb887b9098c6..369e7b1e61d46 100644 --- a/drivers/media/platform/amphion/vpu_v4l2.c +++ b/drivers/media/platform/amphion/vpu_v4l2.c @@ -448,17 +448,14 @@ static void vpu_m2m_device_run(void *priv) { } -static void vpu_m2m_job_abort(void *priv) +static int vpu_m2m_job_ready(void *priv) { - struct vpu_inst *inst = priv; - struct v4l2_m2m_ctx *m2m_ctx = inst->fh.m2m_ctx; - - v4l2_m2m_job_finish(m2m_ctx->m2m_dev, m2m_ctx); + return 0; } static const struct v4l2_m2m_ops vpu_m2m_ops = { .device_run = vpu_m2m_device_run, - .job_abort = vpu_m2m_job_abort + .job_ready = vpu_m2m_job_ready, }; static int vpu_vb2_queue_setup(struct vb2_queue *vq, From 654c818a69c21d2bea4e8fd9eae7da865df9a5c8 Mon Sep 17 00:00:00 2001 From: Ziqing Chen Date: Tue, 14 Apr 2026 21:24:37 +0800 Subject: [PATCH 051/554] ALSA: control: Validate buf_len before strnlen() in snd_ctl_elem_init_enum_names() commit e0da8a8cac74f4b9f577979d131f0d2b88a84487 upstream. snd_ctl_elem_init_enum_names() advances pointer p through the names buffer while decrementing buf_len. If buf_len reaches zero but items remain, the next iteration calls strnlen(p, 0). While strnlen(p, 0) returns 0 and would hit the existing name_len == 0 error path, CONFIG_FORTIFY_SOURCE's fortified strnlen() first checks maxlen against __builtin_dynamic_object_size(). When Clang loses track of p's object size inside the loop, this triggers a BRK exception panic before the return value is examined. Add a buf_len == 0 guard at the loop entry to prevent calling fortified strnlen() on an exhausted buffer. Found by kernel fuzz testing through Xiaomi Smartphone. Fixes: 8d448162bda5 ("ALSA: control: add support for ENUMERATED user space controls") Cc: stable@vger.kernel.org Signed-off-by: Ziqing Chen Link: https://patch.msgid.link/20260414132437.261304-1-chenziqing@xiaomi.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/control.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/control.c b/sound/core/control.c index 9c3fd5113a617..c714f2e5596b3 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1574,6 +1574,10 @@ static int snd_ctl_elem_init_enum_names(struct user_element *ue) /* check that there are enough valid names */ p = names; for (i = 0; i < ue->info.value.enumerated.items; ++i) { + if (buf_len == 0) { + kvfree(names); + return -EINVAL; + } name_len = strnlen(p, buf_len); if (name_len == 0 || name_len >= 64 || name_len == buf_len) { kvfree(names); From 63d21a3aa0108b9dde4e99b0d3d5d679ac68c0f9 Mon Sep 17 00:00:00 2001 From: Zhengchuan Liang Date: Sat, 11 Apr 2026 23:10:26 +0800 Subject: [PATCH 052/554] net: caif: clear client service pointer on teardown commit f7cf8ece8cee3c1ee361991470cdb1eb65ab02e8 upstream. `caif_connect()` can tear down an existing client after remote shutdown by calling `caif_disconnect_client()` followed by `caif_free_client()`. `caif_free_client()` releases the service layer referenced by `adap_layer->dn`, but leaves that pointer stale. When the socket is later destroyed, `caif_sock_destructor()` calls `caif_free_client()` again and dereferences the freed service pointer. Clear the client/service links before releasing the service object so repeated teardown becomes harmless. Fixes: 43e369210108 ("caif: Move refcount from service layer to sock and dev.") Cc: stable@kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Ren Wei Signed-off-by: Zhengchuan Liang Signed-off-by: Ren Wei Link: https://patch.msgid.link/9f3d37847c0037568aae698ca23cd47c6691acb0.1775897577.git.zcliangcn@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/caif/cfsrvl.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index 171fa32ada85c..d687fd0b4ed3a 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -191,10 +191,20 @@ bool cfsrvl_phyid_match(struct cflayer *layer, int phyid) void caif_free_client(struct cflayer *adap_layer) { + struct cflayer *serv_layer; struct cfsrvl *servl; - if (adap_layer == NULL || adap_layer->dn == NULL) + + if (!adap_layer) + return; + + serv_layer = adap_layer->dn; + if (!serv_layer) return; - servl = container_obj(adap_layer->dn); + + layer_set_dn(adap_layer, NULL); + layer_set_up(serv_layer, NULL); + + servl = container_obj(serv_layer); servl->release(&servl->layer); } EXPORT_SYMBOL(caif_free_client); From 19ca9475f18f991735f98a22e735c43e95e6298d Mon Sep 17 00:00:00 2001 From: Luxiao Xu Date: Sat, 11 Apr 2026 23:10:10 +0800 Subject: [PATCH 053/554] net: strparser: fix skb_head leak in strp_abort_strp() commit fe72340daaf1af588be88056faf98965f39e6032 upstream. When the stream parser is aborted, for example after a message assembly timeout, it can still hold a reference to a partially assembled message in strp->skb_head. That skb is not released in strp_abort_strp(), which leaks the partially assembled message and can be triggered repeatedly to exhaust memory. Fix this by freeing strp->skb_head and resetting the parser state in the abort path. Leave strp_stop() unchanged so final cleanup still happens in strp_done() after the work and timer have been synchronized. Fixes: 43a0c6751a32 ("strparser: Stream parser for messages") Cc: stable@kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Yuan Tan Signed-off-by: Luxiao Xu Signed-off-by: Ren Wei Link: https://patch.msgid.link/ade3857a9404999ce9a1c27ec523efc896072678.1775482694.git.rakukuip@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/strparser/strparser.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index e659fea2da70f..e3c6abe58f444 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -45,6 +45,14 @@ static void strp_abort_strp(struct strparser *strp, int err) strp->stopped = 1; + if (strp->skb_head) { + kfree_skb(strp->skb_head); + strp->skb_head = NULL; + } + + strp->skb_nextp = NULL; + strp->need_bytes = 0; + if (strp->sk) { struct sock *sk = strp->sk; From 26506a30e0e26d612f82a7bf0e395626968a44e6 Mon Sep 17 00:00:00 2001 From: Fan Wu Date: Wed, 4 Mar 2026 03:19:34 +0000 Subject: [PATCH 054/554] media: mtk-jpeg: fix use-after-free in release path due to uncancelled work commit 34c519feef3e4fcff1078dc8bdb25fbbbd10303f upstream. The mtk_jpeg_release() function frees the context structure (ctx) without first cancelling any pending or running work in ctx->jpeg_work. This creates a race window where the workqueue callback may still be accessing the context memory after it has been freed. Race condition: CPU 0 (release) CPU 1 (workqueue) ---------------- ------------------ close() mtk_jpeg_release() mtk_jpegenc_worker() ctx = work->data // accessing ctx kfree(ctx) // freed! access ctx // UAF! The work is queued via queue_work() during JPEG encode/decode operations (via mtk_jpeg_device_run). If the device is closed while work is pending or running, the work handler will access freed memory. Fix this by calling cancel_work_sync() BEFORE acquiring the mutex. This ordering is critical: if cancel_work_sync() is called after mutex_lock(), and the work handler also tries to acquire the same mutex, it would cause a deadlock. Note: The open error path does NOT need cancel_work_sync() because INIT_WORK() only initializes the work structure - it does not schedule it. Work is only scheduled later during ioctl operations. Fixes: 5fb1c2361e56 ("mtk-jpegenc: add jpeg encode worker interface") Cc: stable@vger.kernel.org Signed-off-by: Fan Wu Reviewed-by: Nicolas Dufresne Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c index 6268d651bdcfd..a2518793c685a 100644 --- a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c +++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c @@ -1209,6 +1209,7 @@ static int mtk_jpeg_release(struct file *file) struct mtk_jpeg_dev *jpeg = video_drvdata(file); struct mtk_jpeg_ctx *ctx = mtk_jpeg_file_to_ctx(file); + cancel_work_sync(&ctx->jpeg_work); mutex_lock(&jpeg->lock); v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); v4l2_ctrl_handler_free(&ctx->ctrl_hdl); From 4ca4351d269f8ae4276af45167f68846c2606e9a Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 16 Feb 2026 08:45:51 +0100 Subject: [PATCH 055/554] crypto: atmel-sha204a - Fix OTP sysfs read and error handling commit 635c3a757a567b2479639237f5f0d4d9439015f1 upstream. Fix otp_show() to read and print all 64 bytes of the OTP zone. Previously, the loop only printed half of the OTP (32 bytes), and partial output was returned on read errors. Propagate the actual error from atmel_sha204a_otp_read() instead of producing partial output. Replace sprintf() with sysfs_emit_at(), which is preferred for formatting sysfs output because it provides safer bounds checking. Cc: stable@vger.kernel.org Fixes: 13909a0c8897 ("crypto: atmel-sha204a - provide the otp content") Signed-off-by: Thorsten Blum Reviewed-by: Lothar Rubusch Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/atmel-sha204a.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index a12653a658699..8bcb9f3371f01 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "atmel-i2c.h" @@ -120,21 +121,22 @@ static ssize_t otp_show(struct device *dev, { u16 addr; u8 otp[OTP_ZONE_SIZE]; - char *str = buf; struct i2c_client *client = to_i2c_client(dev); - int i; + ssize_t len = 0; + int i, ret; - for (addr = 0; addr < OTP_ZONE_SIZE/4; addr++) { - if (atmel_sha204a_otp_read(client, addr, otp + addr * 4) < 0) { + for (addr = 0; addr < OTP_ZONE_SIZE / 4; addr++) { + ret = atmel_sha204a_otp_read(client, addr, otp + addr * 4); + if (ret < 0) { dev_err(dev, "failed to read otp zone\n"); - break; + return ret; } } - for (i = 0; i < addr*2; i++) - str += sprintf(str, "%02X", otp[i]); - str += sprintf(str, "\n"); - return str - buf; + for (i = 0; i < OTP_ZONE_SIZE; i++) + len += sysfs_emit_at(buf, len, "%02X", otp[i]); + len += sysfs_emit_at(buf, len, "\n"); + return len; } static DEVICE_ATTR_RO(otp); From 65fc57c8b8f0b31be62be291cb1bb01755cec85d Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Thu, 26 Feb 2026 17:41:39 +0900 Subject: [PATCH 056/554] PCI: endpoint: pci-epf-ntb: Remove duplicate resource teardown commit 3446beddba450c8d6f9aca2f028712ac527fead3 upstream. epf_ntb_epc_destroy() duplicates the teardown that the caller is supposed to do later. This leads to an oops when .allow_link fails or when .drop_link is performed. Remove the helper. Also drop pci_epc_put(). EPC device refcounting is tied to configfs EPC group lifetime, and pci_epc_put() in the .drop_link path is sufficient. Fixes: 8b821cf76150 ("PCI: endpoint: Add EP function driver to provide NTB functionality") Signed-off-by: Koichiro Den Signed-off-by: Manivannan Sadhasivam Reviewed-by: Frank Li Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260226084142.2226875-3-den@valinux.co.jp Signed-off-by: Greg Kroah-Hartman --- drivers/pci/endpoint/functions/pci-epf-ntb.c | 56 +------------------- 1 file changed, 2 insertions(+), 54 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-ntb.c b/drivers/pci/endpoint/functions/pci-epf-ntb.c index e01a98e74d211..7702ebb81d996 100644 --- a/drivers/pci/endpoint/functions/pci-epf-ntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-ntb.c @@ -1494,47 +1494,6 @@ static int epf_ntb_db_mw_bar_init(struct epf_ntb *ntb, return ret; } -/** - * epf_ntb_epc_destroy_interface() - Cleanup NTB EPC interface - * @ntb: NTB device that facilitates communication between HOST1 and HOST2 - * @type: PRIMARY interface or SECONDARY interface - * - * Unbind NTB function device from EPC and relinquish reference to pci_epc - * for each of the interface. - */ -static void epf_ntb_epc_destroy_interface(struct epf_ntb *ntb, - enum pci_epc_interface_type type) -{ - struct epf_ntb_epc *ntb_epc; - struct pci_epc *epc; - struct pci_epf *epf; - - if (type < 0) - return; - - epf = ntb->epf; - ntb_epc = ntb->epc[type]; - if (!ntb_epc) - return; - epc = ntb_epc->epc; - pci_epc_remove_epf(epc, epf, type); - pci_epc_put(epc); -} - -/** - * epf_ntb_epc_destroy() - Cleanup NTB EPC interface - * @ntb: NTB device that facilitates communication between HOST1 and HOST2 - * - * Wrapper for epf_ntb_epc_destroy_interface() to cleanup all the NTB interfaces - */ -static void epf_ntb_epc_destroy(struct epf_ntb *ntb) -{ - enum pci_epc_interface_type type; - - for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++) - epf_ntb_epc_destroy_interface(ntb, type); -} - /** * epf_ntb_epc_create_interface() - Create and initialize NTB EPC interface * @ntb: NTB device that facilitates communication between HOST1 and HOST2 @@ -1614,15 +1573,8 @@ static int epf_ntb_epc_create(struct epf_ntb *ntb) ret = epf_ntb_epc_create_interface(ntb, epf->sec_epc, SECONDARY_INTERFACE); - if (ret) { + if (ret) dev_err(dev, "SECONDARY intf: Fail to create NTB EPC\n"); - goto err_epc_create; - } - - return 0; - -err_epc_create: - epf_ntb_epc_destroy_interface(ntb, PRIMARY_INTERFACE); return ret; } @@ -1887,7 +1839,7 @@ static int epf_ntb_bind(struct pci_epf *epf) ret = epf_ntb_init_epc_bar(ntb); if (ret) { dev_err(dev, "Failed to create NTB EPC\n"); - goto err_bar_init; + return ret; } ret = epf_ntb_config_spad_bar_alloc_interface(ntb); @@ -1909,9 +1861,6 @@ static int epf_ntb_bind(struct pci_epf *epf) err_bar_alloc: epf_ntb_config_spad_bar_free(ntb); -err_bar_init: - epf_ntb_epc_destroy(ntb); - return ret; } @@ -1927,7 +1876,6 @@ static void epf_ntb_unbind(struct pci_epf *epf) epf_ntb_epc_cleanup(ntb); epf_ntb_config_spad_bar_free(ntb); - epf_ntb_epc_destroy(ntb); } #define EPF_NTB_R(_name) \ From 127a1fe59c78d89e61637e3141f77b5e01570de1 Mon Sep 17 00:00:00 2001 From: Rong Zhang Date: Wed, 4 Mar 2026 03:47:56 +0800 Subject: [PATCH 057/554] Revert "ALSA: usb: Increase volume range that triggers a warning" commit 41d78cb724f4b40b7548af420ccfe524b14023bb upstream. UAC uses 2 bytes to store volume values, so the maximum volume range is 0xFFFF (65535, val = -32768/32767/1). The reverted commit bumpped the range of triggering the warning to > 65535, effectively making the range check a no-op. It didn't fix anything but covered any potential problems and deviated from the original intention of the range check. This reverts commit 6b971191fcfc9e3c2c0143eea22534f1f48dbb62. Fixes: 6b971191fcfc ("ALSA: usb: Increase volume range that triggers a warning") Cc: stable@vger.kernel.org Signed-off-by: Rong Zhang Acked-by: Arun Raghavan Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260303194805.266158-2-i@rong.moe Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 16eaa9fa317df..0765250f3a56d 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1820,10 +1820,11 @@ static void __build_feature_ctl(struct usb_mixer_interface *mixer, range = (cval->max - cval->min) / cval->res; /* - * There are definitely devices with a range of ~20,000, so let's be - * conservative and allow for a bit more. + * Are there devices with volume range more than 255? I use a bit more + * to be sure. 384 is a resolution magic number found on Logitech + * devices. It will definitively catch all buggy Logitech devices. */ - if (range > 65535) { + if (range > 384) { usb_audio_warn(mixer->chip, "Warning! Unlikely big volume range (=%u), cval->res is probably wrong.", range); From dca29d20ab236f0f587bac76a4b7fc86ac172562 Mon Sep 17 00:00:00 2001 From: Daniel Hodges Date: Fri, 6 Feb 2026 15:05:29 -0500 Subject: [PATCH 058/554] PCI: epf-mhi: Return 0, not remaining timeout, when eDMA ops complete commit 36bfc3642b19a98f1302aed4437c331df9b481f0 upstream. pci_epf_mhi_edma_read() and pci_epf_mhi_edma_write() start DMA operations and wait for completion with a timeout. On successful completion, they previously returned the remaining timeout, which callers may treat as an error. In particular, mhi_ep_ring_add_element(), which calls pci_epf_mhi_edma_write() via mhi_cntrl->write_sync(), interprets any non-zero return value as failure. Return 0 on success instead of the remaining timeout to prevent mhi_ep_ring_add_element() from treating successful completion as an error. Fixes: 7b99aaaddabb ("PCI: epf-mhi: Add eDMA support") Signed-off-by: Daniel Hodges [mani: changed commit log as per https://lore.kernel.org/linux-pci/20260227191510.GA3904799@bhelgaas] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Krishna Chaitanya Chundru Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260206200529.10784-1-git@danielhodges.dev Signed-off-by: Greg Kroah-Hartman --- drivers/pci/endpoint/functions/pci-epf-mhi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/endpoint/functions/pci-epf-mhi.c b/drivers/pci/endpoint/functions/pci-epf-mhi.c index 6643a88c7a0ce..2f077d0b7957f 100644 --- a/drivers/pci/endpoint/functions/pci-epf-mhi.c +++ b/drivers/pci/endpoint/functions/pci-epf-mhi.c @@ -367,6 +367,8 @@ static int pci_epf_mhi_edma_read(struct mhi_ep_cntrl *mhi_cntrl, dev_err(dev, "DMA transfer timeout\n"); dmaengine_terminate_sync(chan); ret = -ETIMEDOUT; + } else { + ret = 0; } err_unmap: @@ -438,6 +440,8 @@ static int pci_epf_mhi_edma_write(struct mhi_ep_cntrl *mhi_cntrl, dev_err(dev, "DMA transfer timeout\n"); dmaengine_terminate_sync(chan); ret = -ETIMEDOUT; + } else { + ret = 0; } err_unmap: From 4d3a4638e9d21dd0e4aa44ee150880486ad282ec Mon Sep 17 00:00:00 2001 From: Josh Law Date: Sun, 8 Mar 2026 20:20:28 +0000 Subject: [PATCH 059/554] lib/ts_kmp: fix integer overflow in pattern length calculation commit 8cdf30813ea8ce881cecc08664144416dbdb3e16 upstream. The ts_kmp algorithm stores its prefix_tbl[] table and pattern in a single allocation sized from the pattern length. If the prefix_tbl[] size calculation wraps, the resulting allocation can be too small and subsequent pattern copies can overflow it. Fix this by rejecting zero-length patterns and by using overflow helpers before calculating the combined allocation size. This fixes a potential heap overflow. The pattern length calculation can wrap during a size_t addition, leading to an undersized allocation. Because the textsearch library is reachable from userspace via Netfilter's xt_string module, this is a security risk that should be backported to LTS kernels. Link: https://lkml.kernel.org/r/20260308202028.2889285-2-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- lib/ts_kmp.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/ts_kmp.c b/lib/ts_kmp.c index 5520dc28255a8..29466c1803c91 100644 --- a/lib/ts_kmp.c +++ b/lib/ts_kmp.c @@ -94,8 +94,22 @@ static struct ts_config *kmp_init(const void *pattern, unsigned int len, struct ts_config *conf; struct ts_kmp *kmp; int i; - unsigned int prefix_tbl_len = len * sizeof(unsigned int); - size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len; + unsigned int prefix_tbl_len; + size_t priv_size; + + /* Zero-length patterns would make kmp_find() read beyond kmp->pattern. */ + if (unlikely(!len)) + return ERR_PTR(-EINVAL); + + /* + * kmp->pattern is stored immediately after the prefix_tbl[] table. + * Reject lengths that would wrap while sizing either region. + */ + if (unlikely(check_mul_overflow(len, sizeof(*kmp->prefix_tbl), + &prefix_tbl_len) || + check_add_overflow(sizeof(*kmp), (size_t)len, &priv_size) || + check_add_overflow(priv_size, prefix_tbl_len, &priv_size))) + return ERR_PTR(-EINVAL); conf = alloc_ts_config(priv_size, gfp_mask); if (IS_ERR(conf)) From da47f8a5958ce8e7a8d5d3eb774fa41d7117b2d5 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Wed, 4 Feb 2026 10:48:59 +0800 Subject: [PATCH 060/554] media: i2c: imx219: Check return value of devm_gpiod_get_optional() in imx219_probe() commit 943b1f27a3eead21b22e2531a5432ea5910b60eb upstream. The devm_gpiod_get_optional() function may return an error pointer (ERR_PTR) in case of a genuine failure during GPIO acquisition, not just NULL which indicates the legitimate absence of an optional GPIO. Add an IS_ERR() check after the function call to catch such errors and propagate them to the probe function, ensuring the driver fails to load safely rather than proceeding with an invalid pointer. Fixes: 1283b3b8f82b ("media: i2c: Add driver for Sony IMX219 sensor") Cc: stable@vger.kernel.org Signed-off-by: Chen Ni Reviewed-by: Dave Stevenson Reviewed-by: Jai Luthra Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/imx219.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/i2c/imx219.c b/drivers/media/i2c/imx219.c index 300935b1ef249..4734c7b4d37b9 100644 --- a/drivers/media/i2c/imx219.c +++ b/drivers/media/i2c/imx219.c @@ -1211,6 +1211,9 @@ static int imx219_probe(struct i2c_client *client) /* Request optional enable pin */ imx219->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(imx219->reset_gpio)) + return dev_err_probe(dev, PTR_ERR(imx219->reset_gpio), + "failed to get reset gpio\n"); /* * The sensor must be powered for imx219_identify_module() From 2e127ceb1c415e246076d8e09e23e443a7a2038f Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 9 Apr 2026 23:04:16 +0530 Subject: [PATCH 061/554] net: qrtr: ns: Fix use-after-free in driver remove() commit 7809fea20c9404bfcfa6112ec08d1fe1d3520beb upstream. In the remove callback, if a packet arrives after destroy_workqueue() is called, but before sock_release(), the qrtr_ns_data_ready() callback will try to queue the work, causing use-after-free issue. Fix this issue by saving the default 'sk_data_ready' callback during qrtr_ns_init() and use it to replace the qrtr_ns_data_ready() callback at the start of remove(). This ensures that even if a packet arrives after destroy_workqueue(), the work struct will not be dereferenced. Note that it is also required to ensure that the RX threads are completed before destroying the workqueue, because the threads could be using the qrtr_ns_data_ready() callback. Cc: stable@vger.kernel.org Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260409-qrtr-fix-v3-5-00a8a5ff2b51@oss.qualcomm.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/qrtr/ns.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 3de9350cbf307..e5b3dac7836b1 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -24,6 +24,7 @@ static struct { struct list_head lookups; struct workqueue_struct *workqueue; struct work_struct work; + void (*saved_data_ready)(struct sock *sk); int local_node; } qrtr_ns; @@ -709,6 +710,7 @@ int qrtr_ns_init(void) goto err_sock; } + qrtr_ns.saved_data_ready = qrtr_ns.sock->sk->sk_data_ready; qrtr_ns.sock->sk->sk_data_ready = qrtr_ns_data_ready; sq.sq_port = QRTR_PORT_CTRL; @@ -749,6 +751,10 @@ int qrtr_ns_init(void) return 0; err_wq: + write_lock_bh(&qrtr_ns.sock->sk->sk_callback_lock); + qrtr_ns.sock->sk->sk_data_ready = qrtr_ns.saved_data_ready; + write_unlock_bh(&qrtr_ns.sock->sk->sk_callback_lock); + destroy_workqueue(qrtr_ns.workqueue); err_sock: sock_release(qrtr_ns.sock); @@ -758,7 +764,12 @@ EXPORT_SYMBOL_GPL(qrtr_ns_init); void qrtr_ns_remove(void) { + write_lock_bh(&qrtr_ns.sock->sk->sk_callback_lock); + qrtr_ns.sock->sk->sk_data_ready = qrtr_ns.saved_data_ready; + write_unlock_bh(&qrtr_ns.sock->sk->sk_callback_lock); + cancel_work_sync(&qrtr_ns.work); + synchronize_net(); destroy_workqueue(qrtr_ns.workqueue); /* sock_release() expects the two references that were put during From 2dde6377ab2e46bb80cf066c659ef016f3ad7a9b Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Sat, 4 Apr 2026 18:20:11 +0300 Subject: [PATCH 062/554] ext2: reject inodes with zero i_nlink and valid mode in ext2_iget() commit 25947cc5b2374cd5bf627fe3141496444260d04f upstream. ext2_iget() already rejects inodes with i_nlink == 0 when i_mode is zero or i_dtime is set, treating them as deleted. However, the case of i_nlink == 0 with a non-zero mode and zero dtime slips through. Since ext2 has no orphan list, such a combination can only result from filesystem corruption - a legitimate inode deletion always sets either i_dtime or clears i_mode before freeing the inode. A crafted image can exploit this gap to present such an inode to the VFS, which then triggers WARN_ON inside drop_nlink() (fs/inode.c) via ext2_unlink(), ext2_rename() and ext2_rmdir(): WARNING: CPU: 3 PID: 609 at fs/inode.c:336 drop_nlink+0xad/0xd0 fs/inode.c:336 CPU: 3 UID: 0 PID: 609 Comm: syz-executor Not tainted 6.12.77+ #1 Call Trace: inode_dec_link_count include/linux/fs.h:2518 [inline] ext2_unlink+0x26c/0x300 fs/ext2/namei.c:295 vfs_unlink+0x2fc/0x9b0 fs/namei.c:4477 do_unlinkat+0x53e/0x730 fs/namei.c:4541 __x64_sys_unlink+0xc6/0x110 fs/namei.c:4587 do_syscall_64+0xf5/0x220 arch/x86/entry/common.c:78 entry_SYSCALL_64_after_hwframe+0x77/0x7f WARNING: CPU: 0 PID: 646 at fs/inode.c:336 drop_nlink+0xad/0xd0 fs/inode.c:336 CPU: 0 UID: 0 PID: 646 Comm: syz.0.17 Not tainted 6.12.77+ #1 Call Trace: inode_dec_link_count include/linux/fs.h:2518 [inline] ext2_rename+0x35e/0x850 fs/ext2/namei.c:374 vfs_rename+0xf2f/0x2060 fs/namei.c:5021 do_renameat2+0xbe2/0xd50 fs/namei.c:5178 __x64_sys_rename+0x7e/0xa0 fs/namei.c:5223 do_syscall_64+0xf5/0x220 arch/x86/entry/common.c:78 entry_SYSCALL_64_after_hwframe+0x77/0x7f WARNING: CPU: 0 PID: 634 at fs/inode.c:336 drop_nlink+0xad/0xd0 fs/inode.c:336 CPU: 0 UID: 0 PID: 634 Comm: syz-executor Not tainted 6.12.77+ #1 Call Trace: inode_dec_link_count include/linux/fs.h:2518 [inline] ext2_rmdir+0xca/0x110 fs/ext2/namei.c:311 vfs_rmdir+0x204/0x690 fs/namei.c:4348 do_rmdir+0x372/0x3e0 fs/namei.c:4407 __x64_sys_unlinkat+0xf0/0x130 fs/namei.c:4577 do_syscall_64+0xf5/0x220 arch/x86/entry/common.c:78 entry_SYSCALL_64_after_hwframe+0x77/0x7f Extend the existing i_nlink == 0 check to also catch this case, reporting the corruption via ext2_error() and returning -EFSCORRUPTED. This rejects the inode at load time and prevents it from reaching any of the namei.c paths. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Vasiliy Kovalev Link: https://patch.msgid.link/20260404152011.2590197-1-kovalev@altlinux.org Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext2/inode.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index e10c376843d77..4aac18942eccd 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1430,9 +1430,17 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) * the test is that same one that e2fsck uses * NeilBrown 1999oct15 */ - if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) { - /* this inode is deleted */ - ret = -ESTALE; + if (inode->i_nlink == 0) { + if (inode->i_mode == 0 || ei->i_dtime) { + /* this inode is deleted */ + ret = -ESTALE; + } else { + ext2_error(sb, __func__, + "inode %lu has zero i_nlink with mode 0%o and no dtime, " + "filesystem may be corrupt", + ino, inode->i_mode); + ret = -EFSCORRUPTED; + } goto bad_inode; } inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); From 0ae7d28dda5c0978d7c0a5e8e9975ba6627a8ef2 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sat, 21 Mar 2026 22:29:11 +0900 Subject: [PATCH 063/554] mm/zsmalloc: copy KMSAN metadata in zs_page_migrate() commit 4fb61d95ad21c3b6f1c09f357ff49d70abb0535e upstream. zs_page_migrate() uses copy_page() to copy the contents of a zspage page during migration. However, copy_page() is not instrumented by KMSAN, so the shadow and origin metadata of the destination page are not updated. As a result, subsequent accesses to the migrated page are reported as use-after-free by KMSAN, despite the data being correctly copied. Add a kmsan_copy_page_meta() call after copy_page() to propagate the KMSAN metadata to the new page, matching what copy_highpage() does internally. Link: https://lkml.kernel.org/r/20260321132912.93434-1-syoshida@redhat.com Fixes: afb2d666d025 ("zsmalloc: use copy_page for full page copy") Signed-off-by: Shigeru Yoshida Reviewed-by: Sergey Senozhatsky Cc: Mark-PK Tsai Cc: Minchan Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/zsmalloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 5bf832f9c05c9..bb1f1e124dc07 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1708,6 +1708,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page, */ d_addr = kmap_local_zpdesc(newzpdesc); copy_page(d_addr, s_addr); + kmsan_copy_page_meta(zpdesc_page(newzpdesc), zpdesc_page(zpdesc)); kunmap_local(d_addr); for (addr = s_addr + offset; addr < s_addr + PAGE_SIZE; From e79427ee11a868071e39b46b4afa5e950a0a7cec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Tue, 31 Mar 2026 18:14:04 -0300 Subject: [PATCH 064/554] ALSA: aoa: i2sbus: clear stale prepared state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5ed060d5491597490fb53ec69da3edc4b1e8c165 upstream. The i2sbus PCM code uses pi->active to constrain the sibling stream to an already prepared duplex format and rate in i2sbus_pcm_open(). That state is set from i2sbus_pcm_prepare(), but the current code only clears it on close. As a result, the sibling stream can inherit stale constraints after the prepared state has been torn down. Clear pi->active when hw_params() or hw_free() tears down the prepared state, and set it again only after prepare succeeds. Replace the stale FIXME in the duplex constraint comment with a description of the current driver behavior: i2sbus still programs a single shared transport configuration for both directions, so mixed formats are not supported in duplex mode. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202604010125.AvkWBYKI-lkp@intel.com/ Fixes: f3d9478b2ce4 ("[ALSA] snd-aoa: add snd-aoa") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260331-aoa-i2sbus-clear-stale-active-v2-1-3764ae2889a1@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/aoa/soundbus/i2sbus/pcm.c | 55 ++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/sound/aoa/soundbus/i2sbus/pcm.c b/sound/aoa/soundbus/i2sbus/pcm.c index 4c480ad2c05dc..3b9930f8c8d84 100644 --- a/sound/aoa/soundbus/i2sbus/pcm.c +++ b/sound/aoa/soundbus/i2sbus/pcm.c @@ -165,17 +165,16 @@ static int i2sbus_pcm_open(struct i2sbus_dev *i2sdev, int in) * currently in use (if any). */ hw->rate_min = 5512; hw->rate_max = 192000; - /* if the other stream is active, then we can only - * support what it is currently using. - * FIXME: I lied. This comment is wrong. We can support - * anything that works with the same serial format, ie. - * when recording 24 bit sound we can well play 16 bit - * sound at the same time iff using the same transfer mode. + /* If the other stream is already prepared, keep this stream + * on the same duplex format and rate. + * + * i2sbus_pcm_prepare() still programs one shared transport + * configuration for both directions, so mixed duplex formats + * are not supported here. */ if (other->active) { - /* FIXME: is this guaranteed by the alsa api? */ hw->formats &= pcm_format_to_bits(i2sdev->format); - /* see above, restrict rates to the one we already have */ + /* Restrict rates to the one already in use. */ hw->rate_min = i2sdev->rate; hw->rate_max = i2sdev->rate; } @@ -283,6 +282,23 @@ void i2sbus_wait_for_stop_both(struct i2sbus_dev *i2sdev) } #endif +static void i2sbus_pcm_clear_active(struct i2sbus_dev *i2sdev, int in) +{ + struct pcm_info *pi; + + guard(mutex)(&i2sdev->lock); + + get_pcm_info(i2sdev, in, &pi, NULL); + pi->active = 0; +} + +static inline int i2sbus_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, int in) +{ + i2sbus_pcm_clear_active(snd_pcm_substream_chip(substream), in); + return 0; +} + static inline int i2sbus_hw_free(struct snd_pcm_substream *substream, int in) { struct i2sbus_dev *i2sdev = snd_pcm_substream_chip(substream); @@ -291,14 +307,27 @@ static inline int i2sbus_hw_free(struct snd_pcm_substream *substream, int in) get_pcm_info(i2sdev, in, &pi, NULL); if (pi->dbdma_ring.stopping) i2sbus_wait_for_stop(i2sdev, pi); + i2sbus_pcm_clear_active(i2sdev, in); return 0; } +static int i2sbus_playback_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + return i2sbus_hw_params(substream, params, 0); +} + static int i2sbus_playback_hw_free(struct snd_pcm_substream *substream) { return i2sbus_hw_free(substream, 0); } +static int i2sbus_record_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + return i2sbus_hw_params(substream, params, 1); +} + static int i2sbus_record_hw_free(struct snd_pcm_substream *substream) { return i2sbus_hw_free(substream, 1); @@ -335,7 +364,6 @@ static int i2sbus_pcm_prepare(struct i2sbus_dev *i2sdev, int in) return -EINVAL; runtime = pi->substream->runtime; - pi->active = 1; if (other->active && ((i2sdev->format != runtime->format) || (i2sdev->rate != runtime->rate))) @@ -450,9 +478,11 @@ static int i2sbus_pcm_prepare(struct i2sbus_dev *i2sdev, int in) /* early exit if already programmed correctly */ /* not locking these is fine since we touch them only in this function */ - if (in_le32(&i2sdev->intfregs->serial_format) == sfr - && in_le32(&i2sdev->intfregs->data_word_sizes) == dws) + if (in_le32(&i2sdev->intfregs->serial_format) == sfr && + in_le32(&i2sdev->intfregs->data_word_sizes) == dws) { + pi->active = 1; return 0; + } /* let's notify the codecs about clocks going away. * For now we only do mastering on the i2s cell... */ @@ -490,6 +520,7 @@ static int i2sbus_pcm_prepare(struct i2sbus_dev *i2sdev, int in) if (cii->codec->switch_clock) cii->codec->switch_clock(cii, CLOCK_SWITCH_SLAVE); + pi->active = 1; return 0; } @@ -734,6 +765,7 @@ static snd_pcm_uframes_t i2sbus_playback_pointer(struct snd_pcm_substream static const struct snd_pcm_ops i2sbus_playback_ops = { .open = i2sbus_playback_open, .close = i2sbus_playback_close, + .hw_params = i2sbus_playback_hw_params, .hw_free = i2sbus_playback_hw_free, .prepare = i2sbus_playback_prepare, .trigger = i2sbus_playback_trigger, @@ -802,6 +834,7 @@ static snd_pcm_uframes_t i2sbus_record_pointer(struct snd_pcm_substream static const struct snd_pcm_ops i2sbus_record_ops = { .open = i2sbus_record_open, .close = i2sbus_record_close, + .hw_params = i2sbus_record_hw_params, .hw_free = i2sbus_record_hw_free, .prepare = i2sbus_record_prepare, .trigger = i2sbus_record_trigger, From df462e82e82c8d438e3fcd7964014a13894b1942 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Mon, 30 Mar 2026 01:00:34 -0300 Subject: [PATCH 065/554] ALSA: aoa: i2sbus: fix OF node lifetime handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4ec93f070eda6b765b62efcaed9241c3b3b0b6ad upstream. i2sbus_add_dev() keeps the matched "sound" child pointer after for_each_child_of_node() has dropped the iterator reference. Take an extra reference before saving that node and drop it after the layout-id/device-id lookup is complete. The function also stores np in dev->sound.ofdev.dev.of_node without taking a reference for the embedded soundbus device. Since i2sbus overrides the embedded platform device release callback, balance that reference explicitly in the local error path and in i2sbus_release_dev(). Fixes: f3d9478b2ce4 ("[ALSA] snd-aoa: add snd-aoa") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260330-aoa-i2sbus-ofnode-lifetime-v1-1-51c309f4ff06@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/aoa/soundbus/i2sbus/core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/aoa/soundbus/i2sbus/core.c b/sound/aoa/soundbus/i2sbus/core.c index f4d43c854bbdf..2f80dfc1a34dd 100644 --- a/sound/aoa/soundbus/i2sbus/core.c +++ b/sound/aoa/soundbus/i2sbus/core.c @@ -84,6 +84,7 @@ static void i2sbus_release_dev(struct device *dev) for (i = aoa_resource_i2smmio; i <= aoa_resource_rxdbdma; i++) free_irq(i2sdev->interrupts[i], i2sdev); i2sbus_control_remove_dev(i2sdev->control, i2sdev); + of_node_put(i2sdev->sound.ofdev.dev.of_node); mutex_destroy(&i2sdev->lock); kfree(i2sdev); } @@ -147,7 +148,6 @@ static int i2sbus_get_and_fixup_rsrc(struct device_node *np, int index, } /* Returns 1 if added, 0 for otherwise; don't return a negative value! */ -/* FIXME: look at device node refcounting */ static int i2sbus_add_dev(struct macio_dev *macio, struct i2sbus_control *control, struct device_node *np) @@ -178,8 +178,9 @@ static int i2sbus_add_dev(struct macio_dev *macio, i = 0; for_each_child_of_node(np, child) { if (of_node_name_eq(child, "sound")) { + of_node_put(sound); i++; - sound = child; + sound = of_node_get(child); } } if (i == 1) { @@ -205,6 +206,7 @@ static int i2sbus_add_dev(struct macio_dev *macio, } } } + of_node_put(sound); /* for the time being, until we can handle non-layout-id * things in some fabric, refuse to attach if there is no * layout-id property or we haven't been forced to attach. @@ -219,7 +221,7 @@ static int i2sbus_add_dev(struct macio_dev *macio, mutex_init(&dev->lock); spin_lock_init(&dev->low_lock); dev->sound.ofdev.archdata.dma_mask = macio->ofdev.archdata.dma_mask; - dev->sound.ofdev.dev.of_node = np; + dev->sound.ofdev.dev.of_node = of_node_get(np); dev->sound.ofdev.dev.dma_mask = &dev->sound.ofdev.archdata.dma_mask; dev->sound.ofdev.dev.parent = &macio->ofdev.dev; dev->sound.ofdev.dev.release = i2sbus_release_dev; @@ -327,6 +329,7 @@ static int i2sbus_add_dev(struct macio_dev *macio, for (i=0;i<3;i++) release_and_free_resource(dev->allocated_resource[i]); mutex_destroy(&dev->lock); + of_node_put(dev->sound.ofdev.dev.of_node); kfree(dev); return 0; } From 8d3124290a03d5c6b45b77d7e60832c99194c0ae Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 10 Mar 2026 11:29:20 +0100 Subject: [PATCH 066/554] ALSA: aoa: Skip devices with no codecs in i2sbus_resume() commit fd7df93013c5118812e63a52635dc6c3a805a1de upstream. In i2sbus_resume(), skip devices with an empty codec list, which avoids using an uninitialized 'sysclock_factor' in the 32-bit format path in i2sbus_pcm_prepare(). In i2sbus_pcm_prepare(), replace two list_for_each_entry() loops with a single list_first_entry() now that the codec list is guaranteed to be non-empty by all callers. Fixes: f3d9478b2ce4 ("[ALSA] snd-aoa: add snd-aoa") Cc: stable@vger.kernel.org Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20260310102921.210109-3-thorsten.blum@linux.dev Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/aoa/soundbus/i2sbus/core.c | 3 +++ sound/aoa/soundbus/i2sbus/pcm.c | 16 +++++----------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/sound/aoa/soundbus/i2sbus/core.c b/sound/aoa/soundbus/i2sbus/core.c index 2f80dfc1a34dd..b2afb45aa437f 100644 --- a/sound/aoa/soundbus/i2sbus/core.c +++ b/sound/aoa/soundbus/i2sbus/core.c @@ -408,6 +408,9 @@ static int i2sbus_resume(struct macio_dev* dev) int err, ret = 0; list_for_each_entry(i2sdev, &control->list, item) { + if (list_empty(&i2sdev->sound.codec_list)) + continue; + /* reset i2s bus format etc. */ i2sbus_pcm_prepare_both(i2sdev); diff --git a/sound/aoa/soundbus/i2sbus/pcm.c b/sound/aoa/soundbus/i2sbus/pcm.c index 3b9930f8c8d84..a2a66045d2447 100644 --- a/sound/aoa/soundbus/i2sbus/pcm.c +++ b/sound/aoa/soundbus/i2sbus/pcm.c @@ -411,6 +411,9 @@ static int i2sbus_pcm_prepare(struct i2sbus_dev *i2sdev, int in) /* set stop command */ command->command = cpu_to_le16(DBDMA_STOP); + cii = list_first_entry(&i2sdev->sound.codec_list, + struct codec_info_item, list); + /* ok, let's set the serial format and stuff */ switch (runtime->format) { /* 16 bit formats */ @@ -418,13 +421,7 @@ static int i2sbus_pcm_prepare(struct i2sbus_dev *i2sdev, int in) case SNDRV_PCM_FORMAT_U16_BE: /* FIXME: if we add different bus factors we need to * do more here!! */ - bi.bus_factor = 0; - list_for_each_entry(cii, &i2sdev->sound.codec_list, list) { - bi.bus_factor = cii->codec->bus_factor; - break; - } - if (!bi.bus_factor) - return -ENODEV; + bi.bus_factor = cii->codec->bus_factor; input_16bit = 1; break; case SNDRV_PCM_FORMAT_S32_BE: @@ -438,10 +435,7 @@ static int i2sbus_pcm_prepare(struct i2sbus_dev *i2sdev, int in) return -EINVAL; } /* we assume all sysclocks are the same! */ - list_for_each_entry(cii, &i2sdev->sound.codec_list, list) { - bi.sysclock_factor = cii->codec->sysclock_factor; - break; - } + bi.sysclock_factor = cii->codec->sysclock_factor; if (clock_and_divisors(bi.sysclock_factor, bi.bus_factor, From 09496158f6ebba8830593f8972035c02f97124c1 Mon Sep 17 00:00:00 2001 From: Harin Lee Date: Mon, 6 Apr 2026 16:49:13 +0900 Subject: [PATCH 067/554] ALSA: ctxfi: Add fallback to default RSR for S/PDIF commit 7d61662197ecdc458e33e475b6ada7f6da61d364 upstream. spdif_passthru_playback_get_resources() uses atc->pll_rate as the RSR for the MSR calculation loop. However, pll_rate is only updated in atc_pll_init() and not in hw_pll_init(), so it remains 0 after the card init. When spdif_passthru_playback_setup() skips atc_pll_init() for 32000 Hz, (rsr * desc.msr) always becomes 0, causing the loop to spin indefinitely. Add fallback to use atc->rsr when atc->pll_rate is 0. This reflects the hardware state, since hw_card_init() already configures the PLL to the default RSR. Fixes: 8cc72361481f ("ALSA: SB X-Fi driver merge") Cc: stable@vger.kernel.org Signed-off-by: Harin Lee Link: https://patch.msgid.link/20260406074913.217374-1-me@harin.net Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/ctxfi/ctatc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c index 14779b383d9ea..d65c7dd738069 100644 --- a/sound/pci/ctxfi/ctatc.c +++ b/sound/pci/ctxfi/ctatc.c @@ -788,7 +788,8 @@ static int spdif_passthru_playback_get_resources(struct ct_atc *atc, struct src *src; int err; int n_amixer = apcm->substream->runtime->channels, i; - unsigned int pitch, rsr = atc->pll_rate; + unsigned int pitch; + unsigned int rsr = atc->pll_rate ? atc->pll_rate : atc->rsr; /* first release old resources */ atc_pcm_release_resources(atc, apcm); From 33074b1e6c18fad4d9e3504178bdf08d64a1c512 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Tue, 24 Mar 2026 16:59:41 -0300 Subject: [PATCH 068/554] ALSA: seq_oss: return full count for successful SEQ_FULLSIZE writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bbc6c0dda54fc0ad8f8aed0b796c23e186e1a188 upstream. snd_seq_oss_write() currently returns the raw load_patch() callback result for SEQ_FULLSIZE events. That callback is documented as returning 0 on success and -errno on failure, but snd_seq_oss_write() is the file write path and should report the number of user bytes consumed on success. Some in-tree backends also return backend-specific positive values, which can still be shorter than the original write size. Return the full byte count for successful SEQ_FULLSIZE writes. Preserve negative errors and convert any nonnegative completion to the original count. Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260324-alsa-seq-oss-fullsize-write-return-v1-1-66d448510538@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/oss/seq_oss_rw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/core/seq/oss/seq_oss_rw.c b/sound/core/seq/oss/seq_oss_rw.c index 8a142fd54a190..307ef98c44c7b 100644 --- a/sound/core/seq/oss/seq_oss_rw.c +++ b/sound/core/seq/oss/seq_oss_rw.c @@ -101,9 +101,9 @@ snd_seq_oss_write(struct seq_oss_devinfo *dp, const char __user *buf, int count, break; } fmt = (*(unsigned short *)rec.c) & 0xffff; - /* FIXME the return value isn't correct */ - return snd_seq_oss_synth_load_patch(dp, rec.s.dev, - fmt, buf, 0, count); + err = snd_seq_oss_synth_load_patch(dp, rec.s.dev, + fmt, buf, 0, count); + return err < 0 ? err : count; } if (ev_is_long(&rec)) { /* extended code */ From 8ebb951a284b7446e025afc7dc5e9516ef9a7214 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 21 Apr 2026 15:59:52 +0800 Subject: [PATCH 069/554] erofs: fix the out-of-bounds nameoff handling for trailing dirents commit d18a3b5d337fa412a38e776e6b4b857a58836575 upstream. Currently we already have boundary-checks for nameoffs, but the trailing dirents are special since the namelens are calculated with strnlen() with unchecked nameoffs. If a crafted EROFS has a trailing dirent with nameoff >= maxsize, maxsize - nameoff can underflow, causing strnlen() to read past the directory block. nameoff0 should also be verified to be a multiple of `sizeof(struct erofs_dirent)` as well [1]. [1] https://sashiko.dev/#/patchset/20260416063511.3173774-1-hsiangkao%40linux.alibaba.com Fixes: 3aa8ec716e52 ("staging: erofs: add directory operations") Fixes: 33bac912840f ("staging: erofs: keep corrupted fs from crashing kernel in erofs_readdir()") Reported-by: Yuhao Jiang Reported-by: Junrui Luo Closes: https://lore.kernel.org/r/A0FD7E0F-7558-49B0-8BC8-EB1ECDB2479A@outlook.com Cc: stable@vger.kernel.org Signed-off-by: Gao Xiang Reviewed-by: Chao Yu Signed-off-by: Greg Kroah-Hartman --- fs/erofs/dir.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 32b4f5aa60c98..2b388775be167 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -18,20 +18,18 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, const char *de_name = (char *)dentry_blk + nameoff; unsigned int de_namelen; - /* the last dirent in the block? */ - if (de + 1 >= end) - de_namelen = strnlen(de_name, maxsize - nameoff); - else + /* non-trailing dirent in the directory block? */ + if (de + 1 < end) de_namelen = le16_to_cpu(de[1].nameoff) - nameoff; + else if (maxsize <= nameoff) + goto err_bogus; + else + de_namelen = strnlen(de_name, maxsize - nameoff); - /* a corrupted entry is found */ - if (nameoff + de_namelen > maxsize || - de_namelen > EROFS_NAME_LEN) { - erofs_err(dir->i_sb, "bogus dirent @ nid %llu", - EROFS_I(dir)->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } + /* a corrupted entry is found (including negative namelen) */ + if (!in_range32(de_namelen, 1, EROFS_NAME_LEN) || + nameoff + de_namelen > maxsize) + goto err_bogus; if (!dir_emit(ctx, de_name, de_namelen, erofs_nid_to_ino64(EROFS_SB(dir->i_sb), @@ -41,6 +39,10 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, ctx->pos += sizeof(struct erofs_dirent); } return 0; +err_bogus: + erofs_err(dir->i_sb, "bogus dirent @ nid %llu", EROFS_I(dir)->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; } static int erofs_readdir(struct file *f, struct dir_context *ctx) @@ -87,7 +89,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) } nameoff = le16_to_cpu(de->nameoff); - if (nameoff < sizeof(struct erofs_dirent) || nameoff >= bsz) { + if (!nameoff || nameoff >= bsz || (nameoff % sizeof(*de))) { erofs_err(sb, "invalid de[0].nameoff %u @ nid %llu", nameoff, EROFS_I(dir)->nid); err = -EFSCORRUPTED; From 858bc8b9edb6eaf0522900128bb9053e2df6b0f6 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 13 Apr 2026 08:00:23 -0500 Subject: [PATCH 070/554] ipmi:ssif: Clean up kthread on errors commit 75c486cb1bcaa1a3ec3a6438498176a3a4998ae4 upstream. If an error occurs after the ssif kthread is created, but before the main IPMI code starts the ssif interface, the ssif kthread will not be stopped. So make sure the kthread is stopped on an error condition if it is running. Fixes: 259307074bfc ("ipmi: Add SMBus interface driver (SSIF)") Reported-by: Li Xiao <<252270051@hdu.edu.cn> Cc: stable@vger.kernel.org Reviewed-by: Li Xiao <252270051@hdu.edu.cn> Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_ssif.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 1b63f7d2fcda5..039d5d26b5de2 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -1270,8 +1270,10 @@ static void shutdown_ssif(void *send_info) ssif_info->stopping = true; timer_delete_sync(&ssif_info->watch_timer); timer_delete_sync(&ssif_info->retry_timer); - if (ssif_info->thread) + if (ssif_info->thread) { kthread_stop(ssif_info->thread); + ssif_info->thread = NULL; + } } static void ssif_remove(struct i2c_client *client) @@ -1918,6 +1920,15 @@ static int ssif_probe(struct i2c_client *client) out: if (rv) { + /* + * If ipmi_register_smi() starts the interface, it will + * call shutdown and that will free the thread and set + * it to NULL. Otherwise it must be freed here. + */ + if (ssif_info->thread) { + kthread_stop(ssif_info->thread); + ssif_info->thread = NULL; + } if (addr_info) addr_info->client = NULL; From 2b2fee890250ab647a601124471a334bb01a0790 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Thu, 9 Apr 2026 19:42:03 +0800 Subject: [PATCH 071/554] jbd2: fix deadlock in jbd2_journal_cancel_revoke() commit 981fcc5674e67158d24d23e841523eccba19d0e7 upstream. Commit f76d4c28a46a ("fs/jbd2: use sleeping version of __find_get_block()") changed jbd2_journal_cancel_revoke() to use __find_get_block_nonatomic() which holds the folio lock instead of i_private_lock. This breaks the lock ordering (folio -> buffer) and causes an ABBA deadlock when the filesystem blocksize < pagesize: T1 T2 ext4_mkdir() ext4_init_new_dir() ext4_append() ext4_getblk() lock_buffer() <- A sync_blockdev() blkdev_writepages() writeback_iter() writeback_get_folio() folio_lock() <- B ext4_journal_get_create_access() jbd2_journal_cancel_revoke() __find_get_block_nonatomic() folio_lock() <- B block_write_full_folio() lock_buffer() <- A This can occasionally cause generic/013 to hang. Fix by only calling __find_get_block_nonatomic() when the passed buffer_head doesn't belong to the bdev, which is the only case that we need to look up its bdev alias. Otherwise, the lookup is redundant since the found buffer_head is equal to the one we passed in. Fixes: f76d4c28a46a ("fs/jbd2: use sleeping version of __find_get_block()") Signed-off-by: Zhang Yi Link: https://patch.msgid.link/20260409114204.917154-1-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/revoke.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 1467f6790747d..c3a18061ce11b 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -428,6 +428,7 @@ void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) journal_t *journal = handle->h_transaction->t_journal; int need_cancel; struct buffer_head *bh = jh2bh(jh); + struct address_space *bh_mapping = bh->b_folio->mapping; jbd2_debug(4, "journal_head %p, cancelling revoke\n", jh); @@ -464,13 +465,14 @@ void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) * buffer_head? If so, we'd better make sure we clear the * revoked status on any hashed alias too, otherwise the revoke * state machine will get very upset later on. */ - if (need_cancel) { + if (need_cancel && !sb_is_blkdev_sb(bh_mapping->host->i_sb)) { struct buffer_head *bh2; + bh2 = __find_get_block_nonatomic(bh->b_bdev, bh->b_blocknr, bh->b_size); if (bh2) { - if (bh2 != bh) - clear_buffer_revoked(bh2); + WARN_ON_ONCE(bh2 == bh); + clear_buffer_revoked(bh2); __brelse(bh2); } } From bdf33ba450afe2cde36f3e03fb5a5928da18dc34 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Feb 2026 18:38:41 +0800 Subject: [PATCH 072/554] KVM: selftests: Fix reserved value WRMSR testcase for multi-feature MSRs commit 9396cc1e282a280bcba2e932e03994e0aada4cd8 upstream. When determining whether or not a WRMSR with reserved bits will #GP or succeed due to the WRMSR not existing per the guest virtual CPU model, expect failure if and only if _all_ features associated with the MSR are unsupported. Checking only the primary feature results in false failures when running on AMD and Hygon CPUs with only one of RDPID or RDTSCP, as AMD/Hygon CPUs ignore MSR_TSC_AUX[63:32], i.e. don't treat the bits as reserved, and so #GP only if the MSR is unsupported. Fixes: 9c38ddb3df94 ("KVM: selftests: Add an MSR test to exercise guest/host and read/write") Reported-by: Zhiquan Li Closes: https://lore.kernel.org/all/20260209041305.64906-6-zhiquan_li@163.com Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260212103841.171459-5-zhiquan_li@163.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/kvm/x86/msrs_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86/msrs_test.c b/tools/testing/selftests/kvm/x86/msrs_test.c index 40d918aedce67..ebd900e713c18 100644 --- a/tools/testing/selftests/kvm/x86/msrs_test.c +++ b/tools/testing/selftests/kvm/x86/msrs_test.c @@ -175,7 +175,7 @@ void guest_test_reserved_val(const struct kvm_msr *msr) * If the CPU will truncate the written value (e.g. SYSENTER on AMD), * expect success and a truncated value, not #GP. */ - if (!this_cpu_has(msr->feature) || + if ((!this_cpu_has(msr->feature) && !this_cpu_has(msr->feature2)) || msr->rsvd_val == fixup_rdmsr_val(msr->index, msr->rsvd_val)) { u8 vec = wrmsr_safe(msr->index, msr->rsvd_val); From cac2106bb9a2180b288079b49ed626414fb5bc45 Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Mon, 2 Mar 2026 19:56:19 -0500 Subject: [PATCH 073/554] md/raid10: fix deadlock with check operation and nowait requests commit 7d96f3120a7fb7210d21b520c5b6f495da6ba436 upstream. When an array check is running it will raise the barrier at which point normal requests will become blocked and increment the nr_pending value to signal there is work pending inside of wait_barrier(). NOWAIT requests do not block and so will return immediately with an error, and additionally do not increment nr_pending in wait_barrier(). Upstream change commit 43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request") added a call to raid_end_bio_io() to fix a memory leak when NOWAIT requests hit this condition. raid_end_bio_io() eventually calls allow_barrier() and it will unconditionally do an atomic_dec_and_test(&conf->nr_pending) even though the corresponding increment on nr_pending didn't happen in the NOWAIT case. This can be easily seen by starting a check operation while an application is doing nowait IO on the same array. This results in a deadlocked state due to nr_pending value underflowing and so the md resync thread gets stuck waiting for nr_pending to == 0. Output of r10conf state of the array when we hit this condition: crash> struct r10conf barrier = 1, nr_pending = { counter = -41 }, nr_waiting = 15, nr_queued = 0, Example of md_sync thread stuck waiting on raise_barrier() and other requests stuck in wait_barrier(): md1_resync [<0>] raise_barrier+0xce/0x1c0 [<0>] raid10_sync_request+0x1ca/0x1ed0 [<0>] md_do_sync+0x779/0x1110 [<0>] md_thread+0x90/0x160 [<0>] kthread+0xbe/0xf0 [<0>] ret_from_fork+0x34/0x50 [<0>] ret_from_fork_asm+0x1a/0x30 kworker/u1040:2+flush-253:4 [<0>] wait_barrier+0x1de/0x220 [<0>] regular_request_wait+0x30/0x180 [<0>] raid10_make_request+0x261/0x1000 [<0>] md_handle_request+0x13b/0x230 [<0>] __submit_bio+0x107/0x1f0 [<0>] submit_bio_noacct_nocheck+0x16f/0x390 [<0>] ext4_io_submit+0x24/0x40 [<0>] ext4_do_writepages+0x254/0xc80 [<0>] ext4_writepages+0x84/0x120 [<0>] do_writepages+0x7a/0x260 [<0>] __writeback_single_inode+0x3d/0x300 [<0>] writeback_sb_inodes+0x1dd/0x470 [<0>] __writeback_inodes_wb+0x4c/0xe0 [<0>] wb_writeback+0x18b/0x2d0 [<0>] wb_workfn+0x2a1/0x400 [<0>] process_one_work+0x149/0x330 [<0>] worker_thread+0x2d2/0x410 [<0>] kthread+0xbe/0xf0 [<0>] ret_from_fork+0x34/0x50 [<0>] ret_from_fork_asm+0x1a/0x30 Fixes: 43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request") Cc: stable@vger.kernel.org Signed-off-by: Josh Hunt Link: https://lore.kernel.org/linux-raid/20260303005619.1352958-1-johunt@akamai.com Signed-off-by: Yu Kuai Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d58ae150b4502..706037d2a87c4 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1184,7 +1184,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, } if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) { - raid_end_bio_io(r10_bio); + free_r10bio(r10_bio); return; } @@ -1372,7 +1372,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, sectors = r10_bio->sectors; if (!regular_request_wait(mddev, conf, bio, sectors)) { - raid_end_bio_io(r10_bio); + free_r10bio(r10_bio); return; } From 377f5ad257fb32767b7435a9d4f1db64c9832adb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 20 Apr 2026 13:41:38 -0600 Subject: [PATCH 074/554] io_uring/register: fix ring resizing with mixed/large SQEs/CQEs Commit 45cd95763e198d74d369ede43aef0b1955b8dea4 upstream. The ring resizing only properly handles "normal" sized SQEs or CQEs, if there are pending entries around a resize. This normally should not be the case, but the code is supposed to handle this regardless. For the mixed SQE/CQE cases, the current copying works fine as they are indexed in the same way. Each half is just copied separately. But for fixed large SQEs and CQEs, the iteration and copy need to take that into account. Cc: stable@kernel.org Fixes: 79cfe9e59c2a ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS") Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/register.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/io_uring/register.c b/io_uring/register.c index faa44dd32cd5a..6c4ef485212d9 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -514,10 +514,20 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) if (tail - old_head > p.sq_entries) goto overflow; for (i = old_head; i < tail; i++) { - unsigned src_head = i & (ctx->sq_entries - 1); - unsigned dst_head = i & (p.sq_entries - 1); - - n.sq_sqes[dst_head] = o.sq_sqes[src_head]; + unsigned index, dst_mask, src_mask; + size_t sq_size; + + index = i; + sq_size = sizeof(struct io_uring_sqe); + src_mask = ctx->sq_entries - 1; + dst_mask = p.sq_entries - 1; + if (ctx->flags & IORING_SETUP_SQE128) { + index <<= 1; + sq_size <<= 1; + src_mask = (ctx->sq_entries << 1) - 1; + dst_mask = (p.sq_entries << 1) - 1; + } + memcpy(&n.sq_sqes[index & dst_mask], &o.sq_sqes[index & src_mask], sq_size); } WRITE_ONCE(n.rings->sq.head, old_head); WRITE_ONCE(n.rings->sq.tail, tail); @@ -534,10 +544,20 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) goto out; } for (i = old_head; i < tail; i++) { - unsigned src_head = i & (ctx->cq_entries - 1); - unsigned dst_head = i & (p.cq_entries - 1); - - n.rings->cqes[dst_head] = o.rings->cqes[src_head]; + unsigned index, dst_mask, src_mask; + size_t cq_size; + + index = i; + cq_size = sizeof(struct io_uring_cqe); + src_mask = ctx->cq_entries - 1; + dst_mask = p.cq_entries - 1; + if (ctx->flags & IORING_SETUP_CQE32) { + index <<= 1; + cq_size <<= 1; + src_mask = (ctx->cq_entries << 1) - 1; + dst_mask = (p.cq_entries << 1) - 1; + } + memcpy(&n.rings->cqes[index & dst_mask], &o.rings->cqes[index & src_mask], cq_size); } WRITE_ONCE(n.rings->cq.head, old_head); WRITE_ONCE(n.rings->cq.tail, tail); From 3c52d09469770c57dffad55677f2ade3e35596fa Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 22 Jan 2026 12:13:21 +0100 Subject: [PATCH 075/554] mfd: stpmic1: Attempt system shutdown twice in case PMIC is confused commit ffdc5c51f8bcd0e5e8255ca275a0a3b958475d99 upstream. Attempt to shut down again, in case the first attempt failed. The STPMIC1 might get confused and the first regmap_update_bits() returns with -ETIMEDOUT / -110 . If that or similar transient failure occurs, try to shut down again. If the second attempt fails, there is some bigger problem, report it to user. Cc: stable@vger.kernel.org Fixes: 6e9df38f359a ("mfd: stpmic1: Add PMIC poweroff via sys-off handler") Signed-off-by: Marek Vasut Link: https://patch.msgid.link/20260122111423.62591-1-marex@nabladev.com Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/stpmic1.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/stpmic1.c b/drivers/mfd/stpmic1.c index 081827bc05961..7c677b0344c60 100644 --- a/drivers/mfd/stpmic1.c +++ b/drivers/mfd/stpmic1.c @@ -16,6 +16,8 @@ #include +#define STPMIC1_MAX_RETRIES 2 + #define STPMIC1_MAIN_IRQ 0 static const struct regmap_range stpmic1_readable_ranges[] = { @@ -121,9 +123,23 @@ static const struct regmap_irq_chip stpmic1_regmap_irq_chip = { static int stpmic1_power_off(struct sys_off_data *data) { struct stpmic1 *ddata = data->cb_data; + int ret; + + /* + * Attempt to shut down again, in case the first attempt failed. + * The STPMIC1 might get confused and the first regmap_update_bits() + * returns with -ETIMEDOUT / -110 . If that or similar transient + * failure occurs, try to shut down again. If the second attempt + * fails, there is some bigger problem, report it to user. + */ + for (int retries = 0; retries < STPMIC1_MAX_RETRIES; retries++) { + ret = regmap_update_bits(ddata->regmap, MAIN_CR, SOFTWARE_SWITCH_OFF, + SOFTWARE_SWITCH_OFF); + if (!ret) + return NOTIFY_DONE; + } - regmap_update_bits(ddata->regmap, MAIN_CR, - SOFTWARE_SWITCH_OFF, SOFTWARE_SWITCH_OFF); + dev_err(ddata->dev, "Failed to access PMIC I2C bus (%d)\n", ret); return NOTIFY_DONE; } From d5b495ba9de0423ef39f8bd86729a885870c7efe Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Tue, 31 Mar 2026 16:13:12 +0800 Subject: [PATCH 076/554] mm/alloc_tag: clear codetag for pages allocated before page_ext initialization commit 6b1842775a460245e97d36d3a67d0cfba7c4ff79 upstream. Due to initialization ordering, page_ext is allocated and initialized relatively late during boot. Some pages have already been allocated and freed before page_ext becomes available, leaving their codetag uninitialized. A clear example is in init_section_page_ext(): alloc_page_ext() calls kmemleak_alloc(). If the slab cache has no free objects, it falls back to the buddy allocator to allocate memory. However, at this point page_ext is not yet fully initialized, so these newly allocated pages have no codetag set. These pages may later be reclaimed by KASAN, which causes the warning to trigger when they are freed because their codetag ref is still empty. Use a global array to track pages allocated before page_ext is fully initialized. The array size is fixed at 8192 entries, and will emit a warning if this limit is exceeded. When page_ext initialization completes, set their codetag to empty to avoid warnings when they are freed later. This warning is only observed with CONFIG_MEM_ALLOC_PROFILING_DEBUG=Y and mem_profiling_compressed disabled: [ 9.582133] ------------[ cut here ]------------ [ 9.582137] alloc_tag was not set [ 9.582139] WARNING: ./include/linux/alloc_tag.h:164 at __pgalloc_tag_sub+0x40f/0x550, CPU#5: systemd/1 [ 9.582190] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 7.0.0-rc4 #1 PREEMPT(lazy) [ 9.582192] Hardware name: Red Hat KVM, BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [ 9.582194] RIP: 0010:__pgalloc_tag_sub+0x40f/0x550 [ 9.582196] Code: 00 00 4c 29 e5 48 8b 05 1f 88 56 05 48 8d 4c ad 00 48 8d 2c c8 e9 87 fd ff ff 0f 0b 0f 0b e9 f3 fe ff ff 48 8d 3d 61 2f ed 03 <67> 48 0f b9 3a e9 b3 fd ff ff 0f 0b eb e4 e8 5e cd 14 02 4c 89 c7 [ 9.582197] RSP: 0018:ffffc9000001f940 EFLAGS: 00010246 [ 9.582200] RAX: dffffc0000000000 RBX: 1ffff92000003f2b RCX: 1ffff110200d806c [ 9.582201] RDX: ffff8881006c0360 RSI: 0000000000000004 RDI: ffffffff9bc7b460 [ 9.582202] RBP: 0000000000000000 R08: 0000000000000000 R09: fffffbfff3a62324 [ 9.582203] R10: ffffffff9d311923 R11: 0000000000000000 R12: ffffea0004001b00 [ 9.582204] R13: 0000000000002000 R14: ffffea0000000000 R15: ffff8881006c0360 [ 9.582206] FS: 00007ffbbcf2d940(0000) GS:ffff888450479000(0000) knlGS:0000000000000000 [ 9.582208] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9.582210] CR2: 000055ee3aa260d0 CR3: 0000000148b67005 CR4: 0000000000770ef0 [ 9.582211] PKRU: 55555554 [ 9.582212] Call Trace: [ 9.582213] [ 9.582214] ? __pfx___pgalloc_tag_sub+0x10/0x10 [ 9.582216] ? check_bytes_and_report+0x68/0x140 [ 9.582219] __free_frozen_pages+0x2e4/0x1150 [ 9.582221] ? __free_slab+0xc2/0x2b0 [ 9.582224] qlist_free_all+0x4c/0xf0 [ 9.582227] kasan_quarantine_reduce+0x15d/0x180 [ 9.582229] __kasan_slab_alloc+0x69/0x90 [ 9.582232] kmem_cache_alloc_noprof+0x14a/0x500 [ 9.582234] do_getname+0x96/0x310 [ 9.582237] do_readlinkat+0x91/0x2f0 [ 9.582239] ? __pfx_do_readlinkat+0x10/0x10 [ 9.582240] ? get_random_bytes_user+0x1df/0x2c0 [ 9.582244] __x64_sys_readlinkat+0x96/0x100 [ 9.582246] do_syscall_64+0xce/0x650 [ 9.582250] ? __x64_sys_getrandom+0x13a/0x1e0 [ 9.582252] ? __pfx___x64_sys_getrandom+0x10/0x10 [ 9.582254] ? do_syscall_64+0x114/0x650 [ 9.582255] ? ksys_read+0xfc/0x1d0 [ 9.582258] ? __pfx_ksys_read+0x10/0x10 [ 9.582260] ? do_syscall_64+0x114/0x650 [ 9.582262] ? do_syscall_64+0x114/0x650 [ 9.582264] ? __pfx_fput_close_sync+0x10/0x10 [ 9.582266] ? file_close_fd_locked+0x178/0x2a0 [ 9.582268] ? __x64_sys_faccessat2+0x96/0x100 [ 9.582269] ? __x64_sys_close+0x7d/0xd0 [ 9.582271] ? do_syscall_64+0x114/0x650 [ 9.582273] ? do_syscall_64+0x114/0x650 [ 9.582275] ? clear_bhb_loop+0x50/0xa0 [ 9.582277] ? clear_bhb_loop+0x50/0xa0 [ 9.582279] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 9.582280] RIP: 0033:0x7ffbbda345ee [ 9.582282] Code: 0f 1f 40 00 48 8b 15 29 38 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff c3 0f 1f 40 00 f3 0f 1e fa 49 89 ca b8 0b 01 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d fa 37 0d 00 f7 d8 64 89 01 48 [ 9.582284] RSP: 002b:00007ffe2ad8de58 EFLAGS: 00000202 ORIG_RAX: 000000000000010b [ 9.582286] RAX: ffffffffffffffda RBX: 000055ee3aa25570 RCX: 00007ffbbda345ee [ 9.582287] RDX: 000055ee3aa25570 RSI: 00007ffe2ad8dee0 RDI: 00000000ffffff9c [ 9.582288] RBP: 0000000000001000 R08: 0000000000000003 R09: 0000000000001001 [ 9.582289] R10: 0000000000001000 R11: 0000000000000202 R12: 0000000000000033 [ 9.582290] R13: 00007ffe2ad8dee0 R14: 00000000ffffff9c R15: 00007ffe2ad8deb0 [ 9.582292] [ 9.582293] ---[ end trace 0000000000000000 ]--- Link: https://lore.kernel.org/20260331081312.123719-1-hao.ge@linux.dev Fixes: dcfe378c81f72 ("lib: introduce support for page allocation tagging") Signed-off-by: Hao Ge Suggested-by: Suren Baghdasaryan Acked-by: Suren Baghdasaryan Cc: Kent Overstreet Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/alloc_tag.h | 2 + include/linux/pgalloc_tag.h | 2 +- lib/alloc_tag.c | 109 ++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 10 +++- 4 files changed, 121 insertions(+), 2 deletions(-) diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index d40ac39bfbe8d..02de2ede560f3 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -163,9 +163,11 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref) { WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n"); } +void alloc_tag_add_early_pfn(unsigned long pfn); #else static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {} static inline void alloc_tag_sub_check(union codetag_ref *ref) {} +static inline void alloc_tag_add_early_pfn(unsigned long pfn) {} #endif /* Caller should verify both ref and tag to be valid */ diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 38a82d65e58e9..951d333622685 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -181,7 +181,7 @@ static inline struct alloc_tag *__pgalloc_tag_get(struct page *page) if (get_page_tag_ref(page, &ref, &handle)) { alloc_tag_sub_check(&ref); - if (ref.ct) + if (ref.ct && !is_codetag_empty(&ref)) tag = ct_to_alloc_tag(ref.ct); put_page_tag_ref(handle); } diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index f26456988445b..df6ba06a8e4a8 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -6,7 +6,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -757,8 +759,115 @@ static __init bool need_page_alloc_tagging(void) return mem_profiling_support; } +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG +/* + * Track page allocations before page_ext is initialized. + * Some pages are allocated before page_ext becomes available, leaving + * their codetag uninitialized. Track these early PFNs so we can clear + * their codetag refs later to avoid warnings when they are freed. + * + * Early allocations include: + * - Base allocations independent of CPU count + * - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init, + * such as trace ring buffers, scheduler per-cpu data) + * + * For simplicity, we fix the size to 8192. + * If insufficient, a warning will be triggered to alert the user. + * + * TODO: Replace fixed-size array with dynamic allocation using + * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion. + */ +#define EARLY_ALLOC_PFN_MAX 8192 + +static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata; +static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0); + +static void __init __alloc_tag_add_early_pfn(unsigned long pfn) +{ + int old_idx, new_idx; + + do { + old_idx = atomic_read(&early_pfn_count); + if (old_idx >= EARLY_ALLOC_PFN_MAX) { + pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n", + EARLY_ALLOC_PFN_MAX); + return; + } + new_idx = old_idx + 1; + } while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx)); + + early_pfns[old_idx] = pfn; +} + +typedef void alloc_tag_add_func(unsigned long pfn); +static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata = + RCU_INITIALIZER(__alloc_tag_add_early_pfn); + +void alloc_tag_add_early_pfn(unsigned long pfn) +{ + alloc_tag_add_func *alloc_tag_add; + + if (static_key_enabled(&mem_profiling_compressed)) + return; + + rcu_read_lock(); + alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr); + if (alloc_tag_add) + alloc_tag_add(pfn); + rcu_read_unlock(); +} + +static void __init clear_early_alloc_pfn_tag_refs(void) +{ + unsigned int i; + + if (static_key_enabled(&mem_profiling_compressed)) + return; + + rcu_assign_pointer(alloc_tag_add_early_pfn_ptr, NULL); + /* Make sure we are not racing with __alloc_tag_add_early_pfn() */ + synchronize_rcu(); + + for (i = 0; i < atomic_read(&early_pfn_count); i++) { + unsigned long pfn = early_pfns[i]; + + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + union pgtag_ref_handle handle; + union codetag_ref ref; + + if (get_page_tag_ref(page, &ref, &handle)) { + /* + * An early-allocated page could be freed and reallocated + * after its page_ext is initialized but before we clear it. + * In that case, it already has a valid tag set. + * We should not overwrite that valid tag with CODETAG_EMPTY. + * + * Note: there is still a small race window between checking + * ref.ct and calling set_codetag_empty(). We accept this + * race as it's unlikely and the extra complexity of atomic + * cmpxchg is not worth it for this debug-only code path. + */ + if (ref.ct) { + put_page_tag_ref(handle); + continue; + } + + set_codetag_empty(&ref); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); + } + } + + } +} +#else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */ +static inline void __init clear_early_alloc_pfn_tag_refs(void) {} +#endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ + static __init void init_page_alloc_tagging(void) { + clear_early_alloc_pfn_tag_refs(); } struct page_ext_operations page_alloc_tagging_ops = { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6288c7e4b971b..43363fd3b8ea4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1289,10 +1289,18 @@ void __pgalloc_tag_add(struct page *page, struct task_struct *task, union pgtag_ref_handle handle; union codetag_ref ref; - if (get_page_tag_ref(page, &ref, &handle)) { + if (likely(get_page_tag_ref(page, &ref, &handle))) { alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr); update_page_tag_ref(handle, &ref); put_page_tag_ref(handle); + } else { + /* + * page_ext is not available yet, record the pfn so we can + * clear the tag ref later when page_ext is initialized. + */ + alloc_tag_add_early_pfn(page_to_pfn(page)); + if (task->alloc_tag) + alloc_tag_set_inaccurate(task->alloc_tag); } } From 2691332ad88b57179c38653e2cd613d5820a52cf Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 27 Mar 2026 16:33:14 -0700 Subject: [PATCH 077/554] mm/damon/core: fix damon_call() vs kdamond_fn() exit race commit 55da81663b9642dd046b26dd6f1baddbcf337c1e upstream. Patch series "mm/damon/core: fix damon_call()/damos_walk() vs kdmond exit race". damon_call() and damos_walk() can leak memory and/or deadlock when they race with kdamond terminations. Fix those. This patch (of 2); When kdamond_fn() main loop is finished, the function cancels all remaining damon_call() requests and unset the damon_ctx->kdamond so that API callers and API functions themselves can know the context is terminated. damon_call() adds the caller's request to the queue first. After that, it shows if the kdamond of the damon_ctx is still running (damon_ctx->kdamond is set). Only if the kdamond is running, damon_call() starts waiting for the kdamond's handling of the newly added request. The damon_call() requests registration and damon_ctx->kdamond unset are protected by different mutexes, though. Hence, damon_call() could race with damon_ctx->kdamond unset, and result in deadlocks. For example, let's suppose kdamond successfully finished the damon_call() requests cancelling. Right after that, damon_call() is called for the context. It registers the new request, and shows the context is still running, because damon_ctx->kdamond unset is not yet done. Hence the damon_call() caller starts waiting for the handling of the request. However, the kdamond is already on the termination steps, so it never handles the new request. As a result, the damon_call() caller threads infinitely waits. Fix this by introducing another damon_ctx field, namely call_controls_obsolete. It is protected by the damon_ctx->call_controls_lock, which protects damon_call() requests registration. Initialize (unset) it in kdamond_fn() before letting damon_start() returns and set it just before the cancelling of remaining damon_call() requests is executed. damon_call() reads the obsolete field under the lock and avoids adding a new request. After this change, only requests that are guaranteed to be handled or cancelled are registered. Hence the after-registration DAMON context termination check is no longer needed. Remove it together. Note that the deadlock will not happen when damon_call() is called for repeat mode request. In tis case, damon_call() returns instead of waiting for the handling when the request registration succeeds and it shows the kdamond is running. However, if the request also has dealloc_on_cancel, the request memory would be leaked. The issue is found by sashiko [1]. Link: https://lore.kernel.org/20260327233319.3528-1-sj@kernel.org Link: https://lore.kernel.org/20260327233319.3528-2-sj@kernel.org Link: https://lore.kernel.org/20260325141956.87144-1-sj@kernel.org [1] Fixes: 42b7491af14c ("mm/damon/core: introduce damon_call()") Signed-off-by: SeongJae Park Cc: # 6.14.x Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/damon.h | 1 + mm/damon/core.c | 45 ++++++++++++++----------------------------- 2 files changed, 15 insertions(+), 31 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index 1a8a79d7e4e8d..6fe6f7fcf83d8 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -781,6 +781,7 @@ struct damon_ctx { /* lists of &struct damon_call_control */ struct list_head call_controls; + bool call_controls_obsolete; struct mutex call_controls_lock; struct damos_walk_control *walk_control; diff --git a/mm/damon/core.c b/mm/damon/core.c index 87b6c9c2d6471..3fb199a47fa9e 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1431,35 +1431,6 @@ bool damon_is_running(struct damon_ctx *ctx) return running; } -/* - * damon_call_handle_inactive_ctx() - handle DAMON call request that added to - * an inactive context. - * @ctx: The inactive DAMON context. - * @control: Control variable of the call request. - * - * This function is called in a case that @control is added to @ctx but @ctx is - * not running (inactive). See if @ctx handled @control or not, and cleanup - * @control if it was not handled. - * - * Returns 0 if @control was handled by @ctx, negative error code otherwise. - */ -static int damon_call_handle_inactive_ctx( - struct damon_ctx *ctx, struct damon_call_control *control) -{ - struct damon_call_control *c; - - mutex_lock(&ctx->call_controls_lock); - list_for_each_entry(c, &ctx->call_controls, list) { - if (c == control) { - list_del(&control->list); - mutex_unlock(&ctx->call_controls_lock); - return -EINVAL; - } - } - mutex_unlock(&ctx->call_controls_lock); - return 0; -} - /** * damon_call() - Invoke a given function on DAMON worker thread (kdamond). * @ctx: DAMON context to call the function for. @@ -1477,6 +1448,10 @@ static int damon_call_handle_inactive_ctx( * synchronization. The return value of the function will be saved in * &damon_call_control->return_code. * + * Note that this function should be called only after damon_start() with the + * @ctx has succeeded. Otherwise, this function could fall into an indefinite + * wait. + * * Return: 0 on success, negative error code otherwise. */ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control) @@ -1487,10 +1462,12 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control) INIT_LIST_HEAD(&control->list); mutex_lock(&ctx->call_controls_lock); + if (ctx->call_controls_obsolete) { + mutex_unlock(&ctx->call_controls_lock); + return -ECANCELED; + } list_add_tail(&control->list, &ctx->call_controls); mutex_unlock(&ctx->call_controls_lock); - if (!damon_is_running(ctx)) - return damon_call_handle_inactive_ctx(ctx, control); if (control->repeat) return 0; wait_for_completion(&control->completion); @@ -2640,6 +2617,9 @@ static int kdamond_fn(void *data) pr_debug("kdamond (%d) starts\n", current->pid); + mutex_lock(&ctx->call_controls_lock); + ctx->call_controls_obsolete = false; + mutex_unlock(&ctx->call_controls_lock); complete(&ctx->kdamond_started); kdamond_init_ctx(ctx); @@ -2749,6 +2729,9 @@ static int kdamond_fn(void *data) if (ctx->ops.cleanup) ctx->ops.cleanup(ctx); kfree(ctx->regions_score_histogram); + mutex_lock(&ctx->call_controls_lock); + ctx->call_controls_obsolete = true; + mutex_unlock(&ctx->call_controls_lock); kdamond_call(ctx, true); pr_debug("kdamond (%d) finishes\n", current->pid); From 2774bcf714739cc6bb86f8812167bb9fbda70f6a Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 9 Apr 2026 12:54:40 +0200 Subject: [PATCH 078/554] mm/hugetlb: fix early boot crash on parameters without '=' separator commit c45b354911d01565156e38d7f6bc07edb51fc34c upstream. If hugepages, hugepagesz, or default_hugepagesz are specified on the kernel command line without the '=' separator, early parameter parsing passes NULL to hugetlb_add_param(), which dereferences it in strlen() and can crash the system during early boot. Reject NULL values in hugetlb_add_param() and return -EINVAL instead. Link: https://lore.kernel.org/20260409105437.108686-4-thorsten.blum@linux.dev Fixes: 5b47c02967ab ("mm/hugetlb: convert cmdline parameters from setup to early") Signed-off-by: Thorsten Blum Reviewed-by: Muchun Song Cc: David Hildenbrand Cc: Frank van der Linden Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 13293976e0568..ba563307278db 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4787,6 +4787,9 @@ static __init int hugetlb_add_param(char *s, int (*setup)(char *)) size_t len; char *p; + if (!s) + return -EINVAL; + if (hugetlb_param_index >= HUGE_MAX_CMDLINE_ARGS) return -EINVAL; From d89044889ecd11b0c2f86663597246e9bdd25679 Mon Sep 17 00:00:00 2001 From: James Kim Date: Mon, 9 Mar 2026 15:05:12 +0900 Subject: [PATCH 079/554] mtd: docg3: fix use-after-free in docg3_release() commit ca19808bc6fac7e29420d8508df569b346b3e339 upstream. In docg3_release(), the docg3 pointer is obtained from cascade->floors[0]->priv before the loop that calls doc_release_device() on each floor. doc_release_device() frees the docg3 struct via kfree(docg3) at line 1881. After the loop, docg3->cascade->bch dereferences the already-freed pointer. Fix this by accessing cascade->bch directly, which is equivalent since docg3->cascade points back to the same cascade struct, and is already available as a local variable. This also removes the now-unused docg3 local variable. Fixes: c8ae3f744ddc ("lib/bch: Rework a little bit the exported function names") Cc: stable@vger.kernel.org Signed-off-by: James Kim Signed-off-by: Miquel Raynal Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/devices/docg3.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c index c93769c233d9a..a46010ea459a3 100644 --- a/drivers/mtd/devices/docg3.c +++ b/drivers/mtd/devices/docg3.c @@ -2049,7 +2049,6 @@ static int __init docg3_probe(struct platform_device *pdev) static void docg3_release(struct platform_device *pdev) { struct docg3_cascade *cascade = platform_get_drvdata(pdev); - struct docg3 *docg3 = cascade->floors[0]->priv; int floor; doc_unregister_sysfs(pdev, cascade); @@ -2057,7 +2056,7 @@ static void docg3_release(struct platform_device *pdev) if (cascade->floors[floor]) doc_release_device(cascade->floors[floor]); - bch_free(docg3->cascade->bch); + bch_free(cascade->bch); } #ifdef CONFIG_OF From 8bcc66896e01f93377d7883698cc7d88d2662480 Mon Sep 17 00:00:00 2001 From: Robert Beckett Date: Fri, 20 Mar 2026 19:22:09 +0000 Subject: [PATCH 080/554] nvme-pci: add NVME_QUIRK_DISABLE_WRITE_ZEROES for Kingston OM3SGP4 commit a8eebf9699d69987cc49cec4e4fdb4111ab32423 upstream. The Kingston OM3SGP42048K2-A00 (PCI ID 2646:502f) firmware has a race condition when processing concurrent write zeroes and DSM (discard) commands, causing spurious "LBA Out of Range" errors and IOMMU page faults at address 0x0. The issue is reliably triggered by running two concurrent mkfs commands on different partitions of the same drive, which generates interleaved write zeroes and discard operations. Disable write zeroes for this device, matching the pattern used for other Kingston OM* drives that have similar firmware issues. Cc: stable@vger.kernel.org Signed-off-by: Robert Beckett Assisted-by: claude-opus-4-6-v1 Signed-off-by: Keith Busch Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a64b4b4a18a19..8875855e45352 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3922,6 +3922,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x2646, 0x501E), /* KINGSTON OM3PGP4xxxxQ OS21011 NVMe SSD */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x2646, 0x502F), /* KINGSTON OM3SGP4xxxxK NVMe SSD */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1f40, 0x1202), /* Netac Technologies Co. NV3000 NVMe SSD */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1f40, 0x5236), /* Netac Technologies Co. NV7000 NVMe SSD */ From c2462d54af3cb83ed843bf369f74ef0312e0feb8 Mon Sep 17 00:00:00 2001 From: Robert Beckett Date: Fri, 20 Mar 2026 19:22:08 +0000 Subject: [PATCH 081/554] nvme: respect NVME_QUIRK_DISABLE_WRITE_ZEROES when wzsl is set commit 40f0496b617b431f8d2dd94d7f785c1121f8a68a upstream. The NVM Command Set Identify Controller data may report a non-zero Write Zeroes Size Limit (wzsl). When present, nvme_init_non_mdts_limits() unconditionally overrides max_zeroes_sectors from wzsl, even if NVME_QUIRK_DISABLE_WRITE_ZEROES previously set it to zero. This effectively re-enables write zeroes for devices that need it disabled, defeating the quirk. Several Kingston OM* drives rely on this quirk to avoid firmware issues with write zeroes commands. Check for the quirk before applying the wzsl override. Fixes: 5befc7c26e5a ("nvme: implement non-mdts command limits") Cc: stable@vger.kernel.org Signed-off-by: Robert Beckett Assisted-by: claude-opus-4-6-v1 Signed-off-by: Keith Busch Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2ba7244fdaf1c..9a0071bd791c8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3393,7 +3393,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) ctrl->dmrl = id->dmrl; ctrl->dmrsl = le32_to_cpu(id->dmrsl); - if (id->wzsl) + if (id->wzsl && !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl); free_data: From e6e3724c7870da9afb0bcb7d239213a2841cf3ba Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 7 Apr 2026 23:56:28 +0200 Subject: [PATCH 082/554] parisc: _llseek syscall is only available for 32-bit userspace commit da3680f564bd787ce974f9931e6e924d908b3b2a upstream. Cc: stable@vger.kernel.org Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/syscalls/syscall.tbl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 88a788a7b18d1..56fdcfa5a03a6 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -154,7 +154,7 @@ # 137 was afs_syscall 138 common setfsuid sys_setfsuid 139 common setfsgid sys_setfsgid -140 common _llseek sys_llseek +140 32 _llseek sys_llseek 141 common getdents sys_getdents compat_sys_getdents 142 common _newselect sys_select compat_sys_select 143 common flock sys_flock From 1fe317364cb5bf4893d50880bc903198ab94ce45 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 10 Apr 2026 16:12:31 +0200 Subject: [PATCH 083/554] parisc: Drop ip_fast_csum() inline assembly implementation commit 3dd31a370c1dccb580f729af7c580ccb1ae3c0c9 upstream. The assembly code of ip_fast_csum() triggers unaligned access warnings if the IP header isn't correctly aligned: Kernel: unaligned access to 0x173d22e76 in inet_gro_receive+0xbc/0x2e8 (iir 0x0e8810b6) Kernel: unaligned access to 0x173d22e7e in inet_gro_receive+0xc4/0x2e8 (iir 0x0e88109a) Kernel: unaligned access to 0x173d22e82 in inet_gro_receive+0xc8/0x2e8 (iir 0x0e90109d) Kernel: unaligned access to 0x173d22e7a in inet_gro_receive+0xd0/0x2e8 (iir 0x0e9810b8) Kernel: unaligned access to 0x173d22e86 in inet_gro_receive+0xdc/0x2e8 (iir 0x0e8810b8) We have the option to a) ignore the warnings, b) work around it by adding more code to check for alignment, or c) to switch to the generic implementation and rely on the compiler to optimize the code. Let's go with c), because a) isn't nice, and b) would effectively lead to an implementation which is basically equal to c). Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v7.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/Kconfig | 3 + arch/parisc/include/asm/checksum.h | 89 +-------------------------- arch/parisc/lib/Makefile | 2 +- arch/parisc/lib/checksum.c | 99 ------------------------------ 4 files changed, 6 insertions(+), 187 deletions(-) delete mode 100644 arch/parisc/lib/checksum.c diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 47fd9662d8005..5625dc9b89076 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -129,6 +129,9 @@ config GENERIC_BUG config GENERIC_BUG_RELATIVE_POINTERS bool +config GENERIC_CSUM + def_bool y + config GENERIC_HWEIGHT bool default y diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h index 2aceebcd695c8..382758808726a 100644 --- a/arch/parisc/include/asm/checksum.h +++ b/arch/parisc/include/asm/checksum.h @@ -4,73 +4,7 @@ #include -/* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) - * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic - * - * this function must be called with even lengths, except - * for the last fragment, which may be odd - * - * it's best to have buff aligned on a 32-bit boundary - */ -extern __wsum csum_partial(const void *, int, __wsum); - -/* - * Optimized for IP headers, which always checksum on 4 octet boundaries. - * - * Written by Randolph Chung , and then mucked with by - * LaMont Jones - */ -static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) -{ - unsigned int sum; - unsigned long t0, t1, t2; - - __asm__ __volatile__ ( -" ldws,ma 4(%1), %0\n" -" addib,<= -4, %2, 2f\n" -"\n" -" ldws 4(%1), %4\n" -" ldws 8(%1), %5\n" -" add %0, %4, %0\n" -" ldws,ma 12(%1), %3\n" -" addc %0, %5, %0\n" -" addc %0, %3, %0\n" -"1: ldws,ma 4(%1), %3\n" -" addib,> -1, %2, 1b\n" -" addc %0, %3, %0\n" -"\n" -" extru %0, 31, 16, %4\n" -" extru %0, 15, 16, %5\n" -" addc %4, %5, %0\n" -" extru %0, 15, 16, %5\n" -" add %0, %5, %0\n" -" subi -1, %0, %0\n" -"2:\n" - : "=r" (sum), "=r" (iph), "=r" (ihl), "=r" (t0), "=r" (t1), "=r" (t2) - : "1" (iph), "2" (ihl) - : "memory"); - - return (__force __sum16)sum; -} - -/* - * Fold a partial checksum - */ -static inline __sum16 csum_fold(__wsum csum) -{ - u32 sum = (__force u32)csum; - /* add the swapped two 16-bit halves of sum, - a possible carry from adding the two 16-bit halves, - will carry from the lower half into the upper half, - giving us the correct sum in the upper half. */ - sum += (sum << 16) + (sum >> 16); - return (__force __sum16)(~sum >> 16); -} - +#define csum_tcpudp_nofold csum_tcpudp_nofold static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) @@ -85,26 +19,7 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, return sum; } -/* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented - */ -static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - __u32 len, __u8 proto, - __wsum sum) -{ - return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); -} - -/* - * this routine is used for miscellaneous IP-like checksums, mainly - * in icmp.c - */ -static inline __sum16 ip_compute_csum(const void *buf, int len) -{ - return csum_fold (csum_partial(buf, len, 0)); -} - +#include #define _HAVE_ARCH_IPV6_CSUM static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile index 7b197667faf6c..d5975d1fb4068 100644 --- a/arch/parisc/lib/Makefile +++ b/arch/parisc/lib/Makefile @@ -3,7 +3,7 @@ # Makefile for parisc-specific library files # -lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \ +lib-y := lusercopy.o bitops.o io.o memset.o memcpy.o \ ucmpdi2.o delay.o obj-y := iomap.o diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c deleted file mode 100644 index 59d8c15d81bd0..0000000000000 --- a/arch/parisc/lib/checksum.c +++ /dev/null @@ -1,99 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * MIPS specific IP/TCP/UDP checksumming routines - * - * Authors: Ralf Baechle, - * Lots of code moved from tcp.c and ip.c; see those files - * for more names. - */ -#include -#include - -#include -#include -#include -#include - -#define addc(_t,_r) \ - __asm__ __volatile__ ( \ -" add %0, %1, %0\n" \ -" addc %0, %%r0, %0\n" \ - : "=r"(_t) \ - : "r"(_r), "0"(_t)); - -static inline unsigned int do_csum(const unsigned char * buff, int len) -{ - int odd, count; - unsigned int result = 0; - - if (len <= 0) - goto out; - odd = 1 & (unsigned long) buff; - if (odd) { - result = be16_to_cpu(*buff); - len--; - buff++; - } - count = len >> 1; /* nr of 16-bit words.. */ - if (count) { - if (2 & (unsigned long) buff) { - result += *(unsigned short *) buff; - count--; - len -= 2; - buff += 2; - } - count >>= 1; /* nr of 32-bit words.. */ - if (count) { - while (count >= 4) { - unsigned int r1, r2, r3, r4; - r1 = *(unsigned int *)(buff + 0); - r2 = *(unsigned int *)(buff + 4); - r3 = *(unsigned int *)(buff + 8); - r4 = *(unsigned int *)(buff + 12); - addc(result, r1); - addc(result, r2); - addc(result, r3); - addc(result, r4); - count -= 4; - buff += 16; - } - while (count) { - unsigned int w = *(unsigned int *) buff; - count--; - buff += 4; - addc(result, w); - } - result = (result & 0xffff) + (result >> 16); - } - if (len & 2) { - result += *(unsigned short *) buff; - buff += 2; - } - } - if (len & 1) - result += le16_to_cpu(*buff); - result = csum_from32to16(result); - if (odd) - result = swab16(result); -out: - return result; -} - -/* - * computes a partial checksum, e.g. for TCP/UDP fragments - */ -/* - * why bother folding? - */ -__wsum csum_partial(const void *buff, int len, __wsum sum) -{ - unsigned int result = do_csum(buff, len); - addc(result, sum); - return (__force __wsum)csum_from32to16(result); -} - -EXPORT_SYMBOL(csum_partial); From dffdcbeabaac077e23caa3155926b09fa6ea451b Mon Sep 17 00:00:00 2001 From: Aksh Garg Date: Thu, 2 Apr 2026 14:25:45 +0530 Subject: [PATCH 084/554] PCI: cadence: Use cdns_pcie_read_sz() for byte or word read access commit d9cf7154deed71a4f23e81101571c79cdc77be00 upstream. The commit 18ac51ae9df9 ("PCI: cadence: Implement capability search using PCI core APIs") assumed all the platforms using Cadence PCIe controller support byte and word register accesses. This is not true for all platforms (e.g., TI J721E SoC, which only supports dword register accesses). This causes capability searches via cdns_pcie_find_capability() to fail on such platforms. Fix this by using cdns_pcie_read_sz() for config read functions, which properly handles size-aligned accesses. Remove the now-unused byte and word read wrapper functions (cdns_pcie_readw and cdns_pcie_readb). Fixes: 18ac51ae9df9 ("PCI: cadence: Implement capability search using PCI core APIs") Signed-off-by: Aksh Garg Signed-off-by: Manivannan Sadhasivam Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260402085545.284457-1-a-garg7@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/cadence/pcie-cadence.h | 56 +++++++++---------- 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/drivers/pci/controller/cadence/pcie-cadence.h b/drivers/pci/controller/cadence/pcie-cadence.h index e2a853d2c0ab0..23ccad0a31f25 100644 --- a/drivers/pci/controller/cadence/pcie-cadence.h +++ b/drivers/pci/controller/cadence/pcie-cadence.h @@ -362,37 +362,6 @@ static inline u32 cdns_pcie_readl(struct cdns_pcie *pcie, u32 reg) return readl(pcie->reg_base + reg); } -static inline u16 cdns_pcie_readw(struct cdns_pcie *pcie, u32 reg) -{ - return readw(pcie->reg_base + reg); -} - -static inline u8 cdns_pcie_readb(struct cdns_pcie *pcie, u32 reg) -{ - return readb(pcie->reg_base + reg); -} - -static inline int cdns_pcie_read_cfg_byte(struct cdns_pcie *pcie, int where, - u8 *val) -{ - *val = cdns_pcie_readb(pcie, where); - return PCIBIOS_SUCCESSFUL; -} - -static inline int cdns_pcie_read_cfg_word(struct cdns_pcie *pcie, int where, - u16 *val) -{ - *val = cdns_pcie_readw(pcie, where); - return PCIBIOS_SUCCESSFUL; -} - -static inline int cdns_pcie_read_cfg_dword(struct cdns_pcie *pcie, int where, - u32 *val) -{ - *val = cdns_pcie_readl(pcie, where); - return PCIBIOS_SUCCESSFUL; -} - static inline u32 cdns_pcie_read_sz(void __iomem *addr, int size) { void __iomem *aligned_addr = PTR_ALIGN_DOWN(addr, 0x4); @@ -433,6 +402,31 @@ static inline void cdns_pcie_write_sz(void __iomem *addr, int size, u32 value) writel(val, aligned_addr); } +static inline int cdns_pcie_read_cfg_byte(struct cdns_pcie *pcie, int where, + u8 *val) +{ + void __iomem *addr = pcie->reg_base + where; + + *val = cdns_pcie_read_sz(addr, 0x1); + return PCIBIOS_SUCCESSFUL; +} + +static inline int cdns_pcie_read_cfg_word(struct cdns_pcie *pcie, int where, + u16 *val) +{ + void __iomem *addr = pcie->reg_base + where; + + *val = cdns_pcie_read_sz(addr, 0x2); + return PCIBIOS_SUCCESSFUL; +} + +static inline int cdns_pcie_read_cfg_dword(struct cdns_pcie *pcie, int where, + u32 *val) +{ + *val = cdns_pcie_readl(pcie, where); + return PCIBIOS_SUCCESSFUL; +} + /* Root Port register access */ static inline void cdns_pcie_rp_writeb(struct cdns_pcie *pcie, u32 reg, u8 value) From 8ba804869382ce307f2a15f5f6f2adfd791f41dc Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 25 Mar 2026 12:40:57 -0700 Subject: [PATCH 085/554] RDMA/mana_ib: Disable RX steering on RSS QP destroy commit dbeb256e8dd87233d891b170c0b32a6466467036 upstream. When an RSS QP is destroyed (e.g. DPDK exit), mana_ib_destroy_qp_rss() destroys the RX WQ objects but does not disable vPort RX steering in firmware. This leaves stale steering configuration that still points to the destroyed RX objects. If traffic continues to arrive (e.g. peer VM is still transmitting) and the VF interface is subsequently brought up (mana_open), the firmware may deliver completions using stale CQ IDs from the old RX objects. These CQ IDs can be reused by the ethernet driver for new TX CQs, causing RX completions to land on TX CQs: WARNING: mana_poll_tx_cq+0x1b8/0x220 [mana] (is_sq == false) WARNING: mana_gd_process_eq_events+0x209/0x290 (cq_table lookup fails) Fix this by disabling vPort RX steering before destroying RX WQ objects. Note that mana_fence_rqs() cannot be used here because the fence completion is delivered on the CQ, which is polled by user-mode (e.g. DPDK) and not visible to the kernel driver. Refactor the disable logic into a shared mana_disable_vport_rx() in mana_en, exported for use by mana_ib, replacing the duplicate code. The ethernet driver's mana_dealloc_queues() is also updated to call this common function. Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Cc: stable@vger.kernel.org Signed-off-by: Long Li Link: https://patch.msgid.link/20260325194100.1929056-1-longli@microsoft.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mana/qp.c | 15 +++++++++++++++ drivers/net/ethernet/microsoft/mana/mana_en.c | 11 ++++++++++- include/net/mana/mana.h | 1 + 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 48c1f4977f218..960878b53da85 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -823,6 +823,21 @@ static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp, ndev = mana_ib_get_netdev(qp->ibqp.device, qp->port); mpc = netdev_priv(ndev); + /* Disable vPort RX steering before destroying RX WQ objects. + * Otherwise firmware still routes traffic to the destroyed queues, + * which can cause bogus completions on reused CQ IDs when the + * ethernet driver later creates new queues on mana_open(). + * + * Unlike the ethernet teardown path, mana_fence_rqs() cannot be + * used here because the fence completion CQE is delivered on the + * CQ which is polled by userspace (e.g. DPDK), so there is no way + * for the kernel to wait for fence completion. + * + * This is best effort — if it fails there is not much we can do, + * and mana_cfg_vport_steering() already logs the error. + */ + mana_disable_vport_rx(mpc); + for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) { ibwq = ind_tbl->ind_tbl[i]; wq = container_of(ibwq, struct mana_ib_wq, ibwq); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 729133c1e5e4f..abb207339992b 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2766,6 +2766,13 @@ static void mana_rss_table_init(struct mana_port_context *apc) ethtool_rxfh_indir_default(i, apc->num_queues); } +int mana_disable_vport_rx(struct mana_port_context *apc) +{ + return mana_cfg_vport_steering(apc, TRI_STATE_FALSE, false, false, + false); +} +EXPORT_SYMBOL_NS(mana_disable_vport_rx, "NET_MANA"); + int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, bool update_hash, bool update_tab) { @@ -3150,10 +3157,12 @@ static int mana_dealloc_queues(struct net_device *ndev) */ apc->rss_state = TRI_STATE_FALSE; - err = mana_config_rss(apc, TRI_STATE_FALSE, false, false); + err = mana_disable_vport_rx(apc); if (err && mana_en_need_log(apc, err)) netdev_err(ndev, "Failed to disable vPort: %d\n", err); + mana_fence_rqs(apc); + /* Even in err case, still need to cleanup the vPort */ mana_destroy_vport(apc); diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 0921485565c05..857f45a3386cc 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -554,6 +554,7 @@ struct mana_port_context { netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev); int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx, bool update_hash, bool update_tab); +int mana_disable_vport_rx(struct mana_port_context *apc); int mana_alloc_queues(struct net_device *ndev); int mana_attach(struct net_device *ndev); From 06d0bed2552fd0dae27d374d4492a2b672e24eed Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Tue, 3 Mar 2026 15:51:27 -0800 Subject: [PATCH 086/554] remoteproc: xlnx: Only access buffer information if IPI is buffered commit 38dd6ccfdfbbe865569a52fe1ba9fa1478f672e6 upstream. In the receive callback check if message is NULL to prevent possibility of crash by NULL pointer dereferencing. Signed-off-by: Ben Levinsky Signed-off-by: Tanmay Shah Fixes: 5dfb28c257b7 ("remoteproc: xilinx: Add mailbox channels for rpmsg") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20260303235127.2317955-3-tanmay.shah@amd.com Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/xlnx_r5_remoteproc.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/remoteproc/xlnx_r5_remoteproc.c b/drivers/remoteproc/xlnx_r5_remoteproc.c index 0b7b173d0d260..b30f660a1c553 100644 --- a/drivers/remoteproc/xlnx_r5_remoteproc.c +++ b/drivers/remoteproc/xlnx_r5_remoteproc.c @@ -232,17 +232,19 @@ static void zynqmp_r5_mb_rx_cb(struct mbox_client *cl, void *msg) ipi = container_of(cl, struct mbox_info, mbox_cl); - /* copy data from ipi buffer to r5_core */ + /* copy data from ipi buffer to r5_core if IPI is buffered. */ ipi_msg = (struct zynqmp_ipi_message *)msg; - buf_msg = (struct zynqmp_ipi_message *)ipi->rx_mc_buf; - len = ipi_msg->len; - if (len > IPI_BUF_LEN_MAX) { - dev_warn(cl->dev, "msg size exceeded than %d\n", - IPI_BUF_LEN_MAX); - len = IPI_BUF_LEN_MAX; + if (ipi_msg) { + buf_msg = (struct zynqmp_ipi_message *)ipi->rx_mc_buf; + len = ipi_msg->len; + if (len > IPI_BUF_LEN_MAX) { + dev_warn(cl->dev, "msg size exceeded than %d\n", + IPI_BUF_LEN_MAX); + len = IPI_BUF_LEN_MAX; + } + buf_msg->len = len; + memcpy(buf_msg->data, ipi_msg->data, len); } - buf_msg->len = len; - memcpy(buf_msg->data, ipi_msg->data, len); /* received and processed interrupt ack */ if (mbox_send_message(ipi->rx_chan, NULL) < 0) From 961db0b89200a4e1e53e2855f103a6dc99943f0a Mon Sep 17 00:00:00 2001 From: Tommaso Merciai Date: Thu, 12 Mar 2026 15:50:38 +0100 Subject: [PATCH 087/554] reset: rzv2h-usb2phy: Keep PHY clock enabled for entire device lifetime commit 8889b289ce1bd11a5102b9617742a1b93bb4843e upstream. The driver was disabling the USB2 PHY clock immediately after register initialization in probe() and after each reset operation. This left the PHY unclocked even though it must remain active for USB functionality. The behavior appeared to work only when another driver (e.g., USB controller) had already enabled the clock, making operation unreliable and hardware-dependent. In configurations where this driver is the sole clock user, USB functionality would fail. Fix this by: - Enabling the clock once in probe() via pm_runtime_resume_and_get() - Removing all pm_runtime_put() calls from assert/deassert/status - Registering a devm cleanup action to release the clock at removal - Removed rzv2h_usbphy_assert_helper() and its call in rzv2h_usb2phy_reset_probe() This ensures the PHY clock remains enabled for the entire device lifetime, preventing instability and aligning with hardware requirements. Cc: stable@vger.kernel.org Fixes: e3911d7f865b ("reset: Add USB2PHY port reset driver for Renesas RZ/V2H(P)") Signed-off-by: Tommaso Merciai Reviewed-by: Philipp Zabel Signed-off-by: Philipp Zabel Signed-off-by: Greg Kroah-Hartman --- drivers/reset/reset-rzv2h-usb2phy.c | 64 ++++++++--------------------- 1 file changed, 18 insertions(+), 46 deletions(-) diff --git a/drivers/reset/reset-rzv2h-usb2phy.c b/drivers/reset/reset-rzv2h-usb2phy.c index ae643575b067c..5bdd392746127 100644 --- a/drivers/reset/reset-rzv2h-usb2phy.c +++ b/drivers/reset/reset-rzv2h-usb2phy.c @@ -49,9 +49,10 @@ static inline struct rzv2h_usb2phy_reset_priv return container_of(rcdev, struct rzv2h_usb2phy_reset_priv, rcdev); } -/* This function must be called only after pm_runtime_resume_and_get() has been called */ -static void rzv2h_usbphy_assert_helper(struct rzv2h_usb2phy_reset_priv *priv) +static int rzv2h_usbphy_reset_assert(struct reset_controller_dev *rcdev, + unsigned long id) { + struct rzv2h_usb2phy_reset_priv *priv = rzv2h_usbphy_rcdev_to_priv(rcdev); const struct rzv2h_usb2phy_reset_of_data *data = priv->data; scoped_guard(spinlock, &priv->lock) { @@ -60,24 +61,6 @@ static void rzv2h_usbphy_assert_helper(struct rzv2h_usb2phy_reset_priv *priv) } usleep_range(11, 20); -} - -static int rzv2h_usbphy_reset_assert(struct reset_controller_dev *rcdev, - unsigned long id) -{ - struct rzv2h_usb2phy_reset_priv *priv = rzv2h_usbphy_rcdev_to_priv(rcdev); - struct device *dev = priv->dev; - int ret; - - ret = pm_runtime_resume_and_get(dev); - if (ret) { - dev_err(dev, "pm_runtime_resume_and_get failed\n"); - return ret; - } - - rzv2h_usbphy_assert_helper(priv); - - pm_runtime_put(dev); return 0; } @@ -87,14 +70,6 @@ static int rzv2h_usbphy_reset_deassert(struct reset_controller_dev *rcdev, { struct rzv2h_usb2phy_reset_priv *priv = rzv2h_usbphy_rcdev_to_priv(rcdev); const struct rzv2h_usb2phy_reset_of_data *data = priv->data; - struct device *dev = priv->dev; - int ret; - - ret = pm_runtime_resume_and_get(dev); - if (ret) { - dev_err(dev, "pm_runtime_resume_and_get failed\n"); - return ret; - } scoped_guard(spinlock, &priv->lock) { writel(data->reset_deassert_val, priv->base + data->reset_reg); @@ -102,8 +77,6 @@ static int rzv2h_usbphy_reset_deassert(struct reset_controller_dev *rcdev, writel(data->reset_release_val, priv->base + data->reset_reg); } - pm_runtime_put(dev); - return 0; } @@ -111,20 +84,10 @@ static int rzv2h_usbphy_reset_status(struct reset_controller_dev *rcdev, unsigned long id) { struct rzv2h_usb2phy_reset_priv *priv = rzv2h_usbphy_rcdev_to_priv(rcdev); - struct device *dev = priv->dev; - int ret; u32 reg; - ret = pm_runtime_resume_and_get(dev); - if (ret) { - dev_err(dev, "pm_runtime_resume_and_get failed\n"); - return ret; - } - reg = readl(priv->base + priv->data->reset_reg); - pm_runtime_put(dev); - return (reg & priv->data->reset_status_bits) == priv->data->reset_status_bits; } @@ -141,6 +104,11 @@ static int rzv2h_usb2phy_reset_of_xlate(struct reset_controller_dev *rcdev, return 0; } +static void rzv2h_usb2phy_reset_pm_runtime_put(void *data) +{ + pm_runtime_put(data); +} + static int rzv2h_usb2phy_reset_probe(struct platform_device *pdev) { const struct rzv2h_usb2phy_reset_of_data *data; @@ -175,14 +143,14 @@ static int rzv2h_usb2phy_reset_probe(struct platform_device *pdev) if (error) return dev_err_probe(dev, error, "pm_runtime_resume_and_get failed\n"); + error = devm_add_action_or_reset(dev, rzv2h_usb2phy_reset_pm_runtime_put, + dev); + if (error) + return dev_err_probe(dev, error, "unable to register cleanup action\n"); + for (unsigned int i = 0; i < data->init_val_count; i++) writel(data->init_vals[i].val, priv->base + data->init_vals[i].reg); - /* keep usb2phy in asserted state */ - rzv2h_usbphy_assert_helper(priv); - - pm_runtime_put(dev); - priv->rcdev.ops = &rzv2h_usbphy_reset_ops; priv->rcdev.of_reset_n_cells = 0; priv->rcdev.nr_resets = 1; @@ -190,7 +158,11 @@ static int rzv2h_usb2phy_reset_probe(struct platform_device *pdev) priv->rcdev.of_node = dev->of_node; priv->rcdev.dev = dev; - return devm_reset_controller_register(dev, &priv->rcdev); + error = devm_reset_controller_register(dev, &priv->rcdev); + if (error) + return dev_err_probe(dev, error, "could not register reset controller\n"); + + return 0; } /* From 558aa1e2cd11741ebc5dd57fd17cb3fd98c5fd64 Mon Sep 17 00:00:00 2001 From: Joseph Salisbury Date: Fri, 3 Apr 2026 17:00:14 -0400 Subject: [PATCH 088/554] sched: Use u64 for bandwidth ratio calculations commit c6e80201e057dfb7253385e60bf541121bf5dc33 upstream. to_ratio() computes BW_SHIFT-scaled bandwidth ratios from u64 period and runtime values, but it returns unsigned long. tg_rt_schedulable() also stores the current group limit and the accumulated child sum in unsigned long. On 32-bit builds, large bandwidth ratios can be truncated and the RT group sum can wrap when enough siblings are present. That can let an overcommitted RT hierarchy pass the schedulability check, and it also narrows the helper result for other callers. Return u64 from to_ratio() and use u64 for the RT group totals so bandwidth ratios are preserved and compared at full width on both 32-bit and 64-bit builds. Fixes: b40b2e8eb521 ("sched: rt: multi level group constraints") Assisted-by: Codex:GPT-5 Signed-off-by: Joseph Salisbury Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260403210014.2713404-1-joseph.salisbury@oracle.com Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 2 +- kernel/sched/rt.c | 2 +- kernel/sched/sched.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 582c3847f483a..421efba7db5a1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4792,7 +4792,7 @@ void sched_post_fork(struct task_struct *p) scx_post_fork(p); } -unsigned long to_ratio(u64 period, u64 runtime) +u64 to_ratio(u64 period, u64 runtime) { if (runtime == RUNTIME_INF) return BW_UNIT; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d4d994fb8999a..a892a01c463e5 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2644,7 +2644,7 @@ static int tg_rt_schedulable(struct task_group *tg, void *data) { struct rt_schedulable_data *d = data; struct task_group *child; - unsigned long total, sum = 0; + u64 total, sum = 0; u64 period, runtime; period = ktime_to_ns(tg->rt_bandwidth.rt_period); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ed37ab9209e59..f750dea7b7876 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2702,7 +2702,7 @@ extern void init_cfs_throttle_work(struct task_struct *p); #define MAX_BW_BITS (64 - BW_SHIFT) #define MAX_BW ((1ULL << MAX_BW_BITS) - 1) -extern unsigned long to_ratio(u64 period, u64 runtime); +extern u64 to_ratio(u64 period, u64 runtime); extern void init_entity_runnable_average(struct sched_entity *se); extern void post_init_entity_util_avg(struct task_struct *p); From df865a3bb91b822069a60e139f974f97db94bb3a Mon Sep 17 00:00:00 2001 From: Simon Liebold Date: Thu, 12 Mar 2026 14:02:00 +0000 Subject: [PATCH 089/554] selftests/mqueue: Fix incorrectly named file commit 64fac99037689020ad97e472ae898e96ea3616dc upstream. Commit 85506aca2eb4 ("selftests/mqueue: Set timeout to 180 seconds") intended to increase the timeout for mq_perf_tests from the default kselftest limit of 45 seconds to 180 seconds. Unfortunately, the file storing this information was incorrectly named `setting` instead of `settings`, causing the kselftest runner not to pick up the limit and keep using the default 45 seconds limit. Fix this by renaming it to `settings` to ensure that the kselftest runner uses the increased timeout of 180 seconds for this test. Fixes: 85506aca2eb4 ("selftests/mqueue: Set timeout to 180 seconds") Cc: # 5.10.y Signed-off-by: Simon Liebold Link: https://lore.kernel.org/r/20260312140200.2224850-1-simonlie@amazon.de Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/mqueue/{setting => settings} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tools/testing/selftests/mqueue/{setting => settings} (100%) diff --git a/tools/testing/selftests/mqueue/setting b/tools/testing/selftests/mqueue/settings similarity index 100% rename from tools/testing/selftests/mqueue/setting rename to tools/testing/selftests/mqueue/settings From 2fcde49092aac55d5beef43fdd3633217672f7d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 7 Apr 2026 18:41:04 +0200 Subject: [PATCH 090/554] landlock: Fix LOG_SUBDOMAINS_OFF inheritance across fork() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 874c8f83826c95c62c21d9edfe9ef43e5c346724 upstream. hook_cred_transfer() only copies the Landlock security blob when the source credential has a domain. This is inconsistent with landlock_restrict_self() which can set LOG_SUBDOMAINS_OFF on a credential without creating a domain (via the ruleset_fd=-1 path): the field is committed but not preserved across fork() because the child's prepare_creds() calls hook_cred_transfer() which skips the copy when domain is NULL. This breaks the documented use case where a process mutes subdomain logs before forking sandboxed children: the children lose the muting and their domains produce unexpected audit records. Fix this by unconditionally copying the Landlock credential blob. Cc: Günther Noack Cc: Jann Horn Cc: stable@vger.kernel.org Fixes: ead9079f7569 ("landlock: Add LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF") Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20260407164107.2012589-1-mic@digikod.net Signed-off-by: Mickaël Salaün Signed-off-by: Greg Kroah-Hartman --- security/landlock/cred.c | 6 +- tools/testing/selftests/landlock/audit_test.c | 88 +++++++++++++++++++ 2 files changed, 90 insertions(+), 4 deletions(-) diff --git a/security/landlock/cred.c b/security/landlock/cred.c index 0cb3edde4d18a..cc419de75cd6b 100644 --- a/security/landlock/cred.c +++ b/security/landlock/cred.c @@ -22,10 +22,8 @@ static void hook_cred_transfer(struct cred *const new, const struct landlock_cred_security *const old_llcred = landlock_cred(old); - if (old_llcred->domain) { - landlock_get_ruleset(old_llcred->domain); - *landlock_cred(new) = *old_llcred; - } + landlock_get_ruleset(old_llcred->domain); + *landlock_cred(new) = *old_llcred; } static int hook_cred_prepare(struct cred *const new, diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index 46d02d49835aa..20099b8667e75 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -279,6 +279,94 @@ TEST_F(audit, thread) &audit_tv_default, sizeof(audit_tv_default))); } +/* + * Verifies that log_subdomains_off set via the ruleset_fd=-1 path (without + * creating a domain) is inherited by children across fork(). This exercises + * the hook_cred_transfer() fix: the Landlock credential blob must be copied + * even when the source credential has no domain. + * + * Phase 1 (baseline): a child without muting creates a domain and triggers a + * denial that IS logged. + * + * Phase 2 (after muting): the parent mutes subdomain logs, forks another child + * who creates a domain and triggers a denial that is NOT logged. + */ +TEST_F(audit, log_subdomains_off_fork) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .scoped = LANDLOCK_SCOPE_SIGNAL, + }; + struct audit_records records; + int ruleset_fd, status; + pid_t child; + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + + /* + * Phase 1: forks a child that creates a domain and triggers a denial + * before any muting. This proves the audit path works. + */ + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + ASSERT_EQ(-1, kill(getppid(), 0)); + ASSERT_EQ(EPERM, errno); + _exit(0); + return; + } + + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(true, WIFEXITED(status)); + ASSERT_EQ(0, WEXITSTATUS(status)); + + /* The denial must be logged (baseline). */ + EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, getpid(), + NULL)); + + /* Drains any remaining records (e.g. domain allocation). */ + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + + /* + * Mutes subdomain logs without creating a domain. The parent's + * credential has domain=NULL and log_subdomains_off=1. + */ + ASSERT_EQ(0, landlock_restrict_self( + -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)); + + /* + * Phase 2: forks a child that creates a domain and triggers a denial. + * Because log_subdomains_off was inherited via fork(), the child's + * domain has log_status=LANDLOCK_LOG_DISABLED. + */ + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + ASSERT_EQ(-1, kill(getppid(), 0)); + ASSERT_EQ(EPERM, errno); + _exit(0); + return; + } + + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(true, WIFEXITED(status)); + ASSERT_EQ(0, WEXITSTATUS(status)); + + /* No denial record should appear. */ + EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd, + getpid(), NULL)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + + EXPECT_EQ(0, close(ruleset_fd)); +} + FIXTURE(audit_flags) { struct audit_filter audit_filter; From a496488e1702df0918472b5559463bdc4b4fc35a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 2 Apr 2026 21:26:04 +0200 Subject: [PATCH 091/554] selftests/landlock: Drain stale audit records on init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3647a4977fb73da385e5a29b9775a4749733470d upstream. Non-audit Landlock tests generate audit records as side effects when audit_enabled is non-zero (e.g. from boot configuration). These records accumulate in the kernel audit backlog while no audit daemon socket is open. When the next test opens a new netlink socket and registers as the audit daemon, the stale backlog is delivered, causing baseline record count checks to fail spuriously. Fix this by draining all pending records in audit_init() right after setting the receive timeout. The 1-usec SO_RCVTIMEO causes audit_recv() to return -EAGAIN once the backlog is empty, naturally terminating the drain loop. Domain deallocation records are emitted asynchronously from a work queue, so they may still arrive after the drain. Remove records.domain == 0 checks that are not preceded by audit_match_record() calls, which would otherwise consume stale records before the count. Document this constraint above audit_count_records(). Increasing the drain timeout to catch in-flight deallocation records was considered but rejected: a longer timeout adds latency to every audit_init() call even when no stale record is pending, and any fixed timeout is still not guaranteed to catch all records under load. Removing the unprotected checks is simpler and avoids the spurious failures. Cc: Günther Noack Cc: stable@vger.kernel.org Fixes: 6a500b22971c ("selftests/landlock: Add tests for audit flags and domain IDs") Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20260402192608.1458252-4-mic@digikod.net Signed-off-by: Mickaël Salaün Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/landlock/audit.h | 19 +++++++++++++++++++ tools/testing/selftests/landlock/audit_test.c | 2 -- .../testing/selftests/landlock/ptrace_test.c | 1 - .../landlock/scoped_abstract_unix_test.c | 1 - 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index 02fd1393947a7..b1c48382e63cf 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -338,6 +338,15 @@ struct audit_records { size_t domain; }; +/* + * WARNING: Do not assert records.domain == 0 without a preceding + * audit_match_record() call. Domain deallocation records are emitted + * asynchronously from kworker threads and can arrive after the drain in + * audit_init(), corrupting the domain count. A preceding audit_match_record() + * call consumes stale records while scanning, making the assertion safe in + * practice because stale deallocation records arrive before the expected access + * records. + */ static int audit_count_records(int audit_fd, struct audit_records *records) { struct audit_message msg; @@ -391,6 +400,16 @@ static int audit_init(void) if (err) return -errno; + /* + * Drains stale audit records that accumulated in the kernel backlog + * while no audit daemon socket was open. This happens when non-audit + * Landlock tests generate records while audit_enabled is non-zero (e.g. + * from boot configuration), or when domain deallocation records arrive + * asynchronously after a previous test's socket was closed. + */ + while (audit_recv(fd, NULL) == 0) + ; + return fd; } diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index 20099b8667e75..fa37abdb5ab32 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -500,7 +500,6 @@ TEST_F(audit_flags, signal) } else { EXPECT_EQ(1, records.access); } - EXPECT_EQ(0, records.domain); /* Updates filter rules to match the drop record. */ set_cap(_metadata, CAP_AUDIT_CONTROL); @@ -689,7 +688,6 @@ TEST_F(audit_exec, signal_and_open) /* Tests that there was no denial until now. */ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); EXPECT_EQ(0, records.access); - EXPECT_EQ(0, records.domain); /* * Wait for the child to do a first denied action by layer1 and diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c index 4e356334ecb74..84ccdb8f594af 100644 --- a/tools/testing/selftests/landlock/ptrace_test.c +++ b/tools/testing/selftests/landlock/ptrace_test.c @@ -486,7 +486,6 @@ TEST_F(audit, trace) /* Makes sure there is no superfluous logged records. */ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); EXPECT_EQ(0, records.access); - EXPECT_EQ(0, records.domain); yama_ptrace_scope = get_yama_ptrace_scope(); ASSERT_LE(0, yama_ptrace_scope); diff --git a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c index 6825082c079c0..5a1afc640c770 100644 --- a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c +++ b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c @@ -312,7 +312,6 @@ TEST_F(scoped_audit, connect_to_child) /* Makes sure there is no superfluous logged records. */ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); EXPECT_EQ(0, records.access); - EXPECT_EQ(0, records.domain); ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); From 1e85a1ae1452948dcd680baf0fb3d99479c0ea84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 2 Apr 2026 21:26:06 +0200 Subject: [PATCH 092/554] selftests/landlock: Fix format warning for __u64 in net_test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a060ac0b8c3345639f5f4a01e2c435d34adf7e3d upstream. On architectures where __u64 is unsigned long (e.g. powerpc64), using %llx to format a __u64 triggers a -Wformat warning because %llx expects unsigned long long. Cast the argument to unsigned long long. Cc: Günther Noack Cc: stable@vger.kernel.org Fixes: a549d055a22e ("selftests/landlock: Add network tests") Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202604020206.62zgOTeP-lkp@intel.com/ Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20260402192608.1458252-6-mic@digikod.net Signed-off-by: Mickaël Salaün Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/landlock/net_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index 3bbc0508420b1..957a06fde5924 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -1344,7 +1344,7 @@ TEST_F(mini, network_access_rights) &net_port, 0)) { TH_LOG("Failed to add rule with access 0x%llx: %s", - access, strerror(errno)); + (unsigned long long)access, strerror(errno)); } } EXPECT_EQ(0, close(ruleset_fd)); From 127ae2e73e6175616abee6f58b1ede51276f70f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 2 Apr 2026 21:26:02 +0200 Subject: [PATCH 093/554] selftests/landlock: Fix snprintf truncation checks in audit helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b566f7a4f0e4f15f78f2e5fac273fa954991e03a upstream. snprintf() returns the number of characters that would have been written, excluding the terminating NUL byte. When the output is truncated, this return value equals or exceeds the buffer size. Fix matches_log_domain_allocated() and matches_log_domain_deallocated() to detect truncation with ">=" instead of ">". Cc: Günther Noack Cc: stable@vger.kernel.org Fixes: 6a500b22971c ("selftests/landlock: Add tests for audit flags and domain IDs") Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20260402192608.1458252-2-mic@digikod.net Signed-off-by: Mickaël Salaün Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/landlock/audit.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index b1c48382e63cf..d9882b7cfa386 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -309,7 +309,7 @@ static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid, log_match_len = snprintf(log_match, sizeof(log_match), log_template, pid); - if (log_match_len > sizeof(log_match)) + if (log_match_len >= sizeof(log_match)) return -E2BIG; return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, @@ -326,7 +326,7 @@ static int __maybe_unused matches_log_domain_deallocated( log_match_len = snprintf(log_match, sizeof(log_match), log_template, num_denials); - if (log_match_len > sizeof(log_match)) + if (log_match_len >= sizeof(log_match)) return -E2BIG; return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, From 4d79d0cf65386b345030d525cf9fb183bbf872eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 2 Apr 2026 21:26:05 +0200 Subject: [PATCH 094/554] selftests/landlock: Skip stale records in audit_match_record() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 07c2572a87573b2a2f0fd6b9f538cd1aeef2eee7 upstream. Domain deallocation records are emitted asynchronously from kworker threads (via free_ruleset_work()). Stale deallocation records from a previous test can arrive during the current test's deallocation read loop and be picked up by audit_match_record() instead of the expected record, causing a domain ID mismatch. The audit.layers test (which creates 16 nested domains) is particularly vulnerable because it reads 16 deallocation records in sequence, providing a large window for stale records to interleave. The same issue affects audit_flags.signal, where deallocation records from a previous test (audit.layers) can leak into the next test and be picked up by audit_match_record() instead of the expected record. Fix this by continuing to read records when the type matches but the content pattern does not. Stale records are silently consumed, and the loop only stops when both type and pattern match (or the socket times out with -EAGAIN). Additionally, extend matches_log_domain_deallocated() with an expected_domain_id parameter. When set, the regex pattern includes the specific domain ID as a literal hex value, so that deallocation records for a different domain do not match the pattern at all. This handles the case where the stale record has the same denial count as the expected one (e.g. both have denials=1), which the type+pattern loop alone cannot distinguish. Callers that already know the expected domain ID (from a prior denial or allocation record) now pass it to filter precisely. When expected_domain_id is set, matches_log_domain_deallocated() also temporarily increases the socket timeout to audit_tv_dom_drop (1 second) to wait for the asynchronous kworker deallocation, and restores audit_tv_default afterward. This removes the need for callers to manage the timeout switch manually. Cc: Günther Noack Cc: stable@vger.kernel.org Fixes: 6a500b22971c ("selftests/landlock: Add tests for audit flags and domain IDs") Link: https://lore.kernel.org/r/20260402192608.1458252-5-mic@digikod.net Signed-off-by: Mickaël Salaün Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/landlock/audit.h | 82 ++++++++++++++----- tools/testing/selftests/landlock/audit_test.c | 34 ++++---- 2 files changed, 77 insertions(+), 39 deletions(-) diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index d9882b7cfa386..c0c7e8a66edc1 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -249,9 +249,9 @@ static __maybe_unused char *regex_escape(const char *const src, char *dst, static int audit_match_record(int audit_fd, const __u16 type, const char *const pattern, __u64 *domain_id) { - struct audit_message msg; + struct audit_message msg, last_mismatch = {}; int ret, err = 0; - bool matches_record = !type; + int num_type_match = 0; regmatch_t matches[2]; regex_t regex; @@ -259,21 +259,35 @@ static int audit_match_record(int audit_fd, const __u16 type, if (ret) return -EINVAL; - do { + /* + * Reads records until one matches both the expected type and the + * pattern. Type-matching records with non-matching content are + * silently consumed, which handles stale domain deallocation records + * from a previous test emitted asynchronously by kworker threads. + */ + while (true) { memset(&msg, 0, sizeof(msg)); err = audit_recv(audit_fd, &msg); - if (err) + if (err) { + if (num_type_match) { + printf("DATA: %s\n", last_mismatch.data); + printf("ERROR: %d record(s) matched type %u" + " but not pattern: %s\n", + num_type_match, type, pattern); + } goto out; + } - if (msg.header.nlmsg_type == type) - matches_record = true; - } while (!matches_record); + if (type && msg.header.nlmsg_type != type) + continue; - ret = regexec(®ex, msg.data, ARRAY_SIZE(matches), matches, 0); - if (ret) { - printf("DATA: %s\n", msg.data); - printf("ERROR: no match for pattern: %s\n", pattern); - err = -ENOENT; + ret = regexec(®ex, msg.data, ARRAY_SIZE(matches), matches, + 0); + if (!ret) + break; + + num_type_match++; + last_mismatch = msg; } if (domain_id) { @@ -316,21 +330,49 @@ static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid, domain_id); } -static int __maybe_unused matches_log_domain_deallocated( - int audit_fd, unsigned int num_denials, __u64 *domain_id) +/* + * Matches a domain deallocation record. When expected_domain_id is non-zero, + * the pattern includes the specific domain ID so that stale deallocation + * records from a previous test (with a different domain ID) are skipped by + * audit_match_record(), and the socket timeout is temporarily increased to + * audit_tv_dom_drop to wait for the asynchronous kworker deallocation. + */ +static int __maybe_unused +matches_log_domain_deallocated(int audit_fd, unsigned int num_denials, + __u64 expected_domain_id, __u64 *domain_id) { static const char log_template[] = REGEX_LANDLOCK_PREFIX " status=deallocated denials=%u$"; - char log_match[sizeof(log_template) + 10]; - int log_match_len; + static const char log_template_with_id[] = + "^audit([0-9.:]\\+): domain=\\(%llx\\)" + " status=deallocated denials=%u$"; + char log_match[sizeof(log_template_with_id) + 32]; + int log_match_len, err; + + if (expected_domain_id) + log_match_len = snprintf(log_match, sizeof(log_match), + log_template_with_id, + (unsigned long long)expected_domain_id, + num_denials); + else + log_match_len = snprintf(log_match, sizeof(log_match), + log_template, num_denials); - log_match_len = snprintf(log_match, sizeof(log_match), log_template, - num_denials); if (log_match_len >= sizeof(log_match)) return -E2BIG; - return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, - domain_id); + if (expected_domain_id) + setsockopt(audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_dom_drop, sizeof(audit_tv_dom_drop)); + + err = audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, + domain_id); + + if (expected_domain_id) + setsockopt(audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default, + sizeof(audit_tv_default)); + + return err; } struct audit_records { diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index fa37abdb5ab32..08d4c70e015bb 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -139,23 +139,24 @@ TEST_F(audit, layers) WEXITSTATUS(status) != EXIT_SUCCESS) _metadata->exit_code = KSFT_FAIL; - /* Purges log from deallocated domains. */ - EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, - &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); + /* + * Purges log from deallocated domains. Records arrive in LIFO order + * (innermost domain first) because landlock_put_hierarchy() walks the + * chain sequentially in a single kworker context. + */ for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) { __u64 deallocated_dom = 2; EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, + (*domain_stack)[i], &deallocated_dom)); EXPECT_EQ((*domain_stack)[i], deallocated_dom) { TH_LOG("Failed to match domain %llx (#%d)", - (*domain_stack)[i], i); + (unsigned long long)(*domain_stack)[i], i); } } EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack))); - EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, - &audit_tv_default, sizeof(audit_tv_default))); EXPECT_EQ(0, close(ruleset_fd)); } @@ -270,13 +271,9 @@ TEST_F(audit, thread) EXPECT_EQ(0, close(pipe_parent[1])); ASSERT_EQ(0, pthread_join(thread, NULL)); - EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, - &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); - EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, - &deallocated_dom)); + EXPECT_EQ(0, matches_log_domain_deallocated( + self->audit_fd, 1, denial_dom, &deallocated_dom)); EXPECT_EQ(denial_dom, deallocated_dom); - EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, - &audit_tv_default, sizeof(audit_tv_default))); } /* @@ -520,22 +517,21 @@ TEST_F(audit_flags, signal) if (variant->restrict_flags & LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) { + /* + * No deallocation record: denials=0 never matches a real + * record. + */ EXPECT_EQ(-EAGAIN, - matches_log_domain_deallocated(self->audit_fd, 0, + matches_log_domain_deallocated(self->audit_fd, 0, 0, &deallocated_dom)); EXPECT_EQ(deallocated_dom, 2); } else { - EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, - &audit_tv_dom_drop, - sizeof(audit_tv_dom_drop))); EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 2, + *self->domain_id, &deallocated_dom)); EXPECT_NE(deallocated_dom, 2); EXPECT_NE(deallocated_dom, 0); EXPECT_EQ(deallocated_dom, *self->domain_id); - EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, - &audit_tv_default, - sizeof(audit_tv_default))); } } From ad0126ffcba8777109852979eaaa6dca6703abdb Mon Sep 17 00:00:00 2001 From: Dawei Feng Date: Sun, 19 Apr 2026 17:03:48 +0800 Subject: [PATCH 095/554] rbd: fix null-ptr-deref when device_add_disk() fails commit d1fef92e414433ca7b89abf85cb0df42b8d475eb upstream. do_rbd_add() publishes the device with device_add() before calling device_add_disk(). If device_add_disk() fails after device_add() succeeds, the error path calls rbd_free_disk() directly and then later falls through to rbd_dev_device_release(), which calls rbd_free_disk() again. This double teardown can leave blk-mq cleanup operating on invalid state and trigger a null-ptr-deref in __blk_mq_free_map_and_rqs(), reached from blk_mq_free_tag_set(). Fix this by following the normal remove ordering: call device_del() before rbd_dev_device_release() when device_add_disk() fails after device_add(). That keeps the teardown sequence consistent and avoids re-entering disk cleanup through the wrong path. The bug was first flagged by an experimental analysis tool we are developing for kernel memory-management bugs while analyzing v6.13-rc1. The tool is still under development and is not yet publicly available. We reproduced the bug on v7.0 with a real Ceph backend and a QEMU x86_64 guest booted with KASAN and CONFIG_FAILSLAB enabled. The reproducer confines failslab injections to the __add_disk() range and injects fail-nth while mapping an RBD image through /sys/bus/rbd/add_single_major. On the unpatched kernel, fail-nth=4 reliably triggered the fault: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 0 UID: 0 PID: 273 Comm: bash Not tainted 7.0.0-01247-gd60bc1401583 #6 PREEMPT(lazy) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 RIP: 0010:__blk_mq_free_map_and_rqs+0x8c/0x240 Code: 00 00 48 8b 6b 60 41 89 f4 49 c1 e4 03 4c 01 e5 45 85 ed 0f 85 0a 01 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 e9 48 c1 e9 03 <80> 3c 01 00 0f 85 31 01 00 00 4c 8b 6d 00 4d 85 ed 0f 84 e2 00 00 RSP: 0018:ff1100000ab0fac8 EFLAGS: 00000246 RAX: dffffc0000000000 RBX: ff1100000c4806a0 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 0000000000000000 RDI: ff1100000c4806f4 RBP: 0000000000000000 R08: 0000000000000001 R09: ffe21c000189001b R10: ff1100000c4800df R11: ff1100006cf37be0 R12: 0000000000000000 R13: 0000000000000000 R14: ff1100000c480700 R15: ff1100000c480004 FS: 00007f0fbe8fe740(0000) GS:ff110000e5851000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fe53473b2e0 CR3: 0000000012eef000 CR4: 00000000007516f0 PKRU: 55555554 Call Trace: blk_mq_free_tag_set+0x77/0x460 do_rbd_add+0x1446/0x2b80 ? __pfx_do_rbd_add+0x10/0x10 ? lock_acquire+0x18c/0x300 ? find_held_lock+0x2b/0x80 ? sysfs_file_kobj+0xb6/0x1b0 ? __pfx_sysfs_kf_write+0x10/0x10 kernfs_fop_write_iter+0x2f4/0x4a0 vfs_write+0x98e/0x1000 ? expand_files+0x51f/0x850 ? __pfx_vfs_write+0x10/0x10 ksys_write+0xf2/0x1d0 ? __pfx_ksys_write+0x10/0x10 do_syscall_64+0x115/0x690 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f0fbea15907 Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007ffe22346ea8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000058 RCX: 00007f0fbea15907 RDX: 0000000000000058 RSI: 0000563ace6c0ef0 RDI: 0000000000000001 RBP: 0000563ace6c0ef0 R08: 0000563ace6c0ef0 R09: 6b6435726d694141 R10: 5250337279762f78 R11: 0000000000000246 R12: 0000000000000058 R13: 00007f0fbeb1c780 R14: ff1100000c480700 R15: ff1100000c480004 With this fix applied, rerunning the reproducer over fail-nth=1..256 yields no KASAN reports. [ idryomov: rename err_out_device_del -> err_out_device ] Cc: stable@vger.kernel.org Fixes: 27c97abc30e2 ("rbd: add add_disk() error handling") Signed-off-by: Zilin Guan Signed-off-by: Dawei Feng Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8f441eb8b1926..9236b5184bce3 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -7165,7 +7165,7 @@ static ssize_t do_rbd_add(const char *buf, size_t count) rc = device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL); if (rc) - goto err_out_cleanup_disk; + goto err_out_device; spin_lock(&rbd_dev_list_lock); list_add_tail(&rbd_dev->node, &rbd_dev_list); @@ -7179,8 +7179,8 @@ static ssize_t do_rbd_add(const char *buf, size_t count) module_put(THIS_MODULE); return rc; -err_out_cleanup_disk: - rbd_free_disk(rbd_dev); +err_out_device: + device_del(&rbd_dev->dev); err_out_image_lock: rbd_dev_image_unlock(rbd_dev); rbd_dev_device_release(rbd_dev); From 81880f84cb3611914014f90147766a0a435d2144 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 27 Feb 2026 22:19:45 +0900 Subject: [PATCH 096/554] block: fix zone write plugs refcount handling in disk_zone_wplug_schedule_bio_work() commit 0a8b8af896e0ef83e188e1fe20f98f2bbb1c2459 upstream. The function disk_zone_wplug_schedule_bio_work() always takes a reference on the zone write plug of the BIO work being scheduled. This ensures that the zone write plug cannot be freed while the BIO work is being scheduled but has not run yet. However, this unconditional reference taking is fragile since the reference taken is released by the BIO work blk_zone_wplug_bio_work() function, which implies that there always must be a 1:1 relation between the work being scheduled and the work running. Make sure to drop the reference taken when scheduling the BIO work if the work is already scheduled, that is, when queue_work() returns false. Fixes: 9e78c38ab30b ("block: Hold a reference on zone write plugs to schedule submission") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-zoned.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 0c812f3bd7df3..e98bd9ad02329 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -801,13 +801,17 @@ static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk, struct blk_zone_wplug *zwplug) { /* - * Take a reference on the zone write plug and schedule the submission - * of the next plugged BIO. blk_zone_wplug_bio_work() will release the - * reference we take here. + * Schedule the submission of the next plugged BIO. Taking a reference + * to the zone write plug is required as the bio_work belongs to the + * plug, and thus we must ensure that the write plug does not go away + * while the work is being scheduled but has not run yet. + * blk_zone_wplug_bio_work() will release the reference we take here, + * and we also drop this reference if the work is already scheduled. */ WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)); refcount_inc(&zwplug->ref); - queue_work(disk->zone_wplugs_wq, &zwplug->bio_work); + if (!queue_work(disk->zone_wplugs_wq, &zwplug->bio_work)) + disk_put_zone_wplug(zwplug); } static inline void disk_zone_wplug_add_bio(struct gendisk *disk, From dd154d768db5ebc610dd05dc37c112ae0bcdd1cb Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 2 Mar 2026 13:10:34 +0000 Subject: [PATCH 097/554] io_uring/timeout: check unused sqe fields commit 484ae637a3e3d909718de7c07afd3bb34b6b8504 upstream. Zero check unused SQE fields addr3 and pad2 for timeout and timeout update requests. They're not needed now, but could be used sometime in the future. Cc: stable@vger.kernel.org Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/timeout.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/io_uring/timeout.c b/io_uring/timeout.c index cd42316d0f3c0..dec1d13ca4207 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -445,6 +445,8 @@ int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) return -EINVAL; + if (sqe->addr3 || sqe->__pad2[0]) + return -EINVAL; if (sqe->buf_index || sqe->len || sqe->splice_fd_in) return -EINVAL; @@ -517,6 +519,8 @@ static int __io_timeout_prep(struct io_kiocb *req, unsigned flags; u32 off = READ_ONCE(sqe->off); + if (sqe->addr3 || sqe->__pad2[0]) + return -EINVAL; if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in) return -EINVAL; if (off && is_timeout_link) From 8661370e0865c8670ed367e21c90ca166e7875a7 Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Fri, 10 Apr 2026 15:34:14 +0000 Subject: [PATCH 098/554] block: relax pgmap check in bio_add_page for compatible zone device pages commit 41c665aae2b5dbecddddcc8ace344caf630cc7a4 upstream. bio_add_page() and bio_integrity_add_page() reject pages from different dev_pagemaps entirely, returning 0 even when those pages have compatible DMA mapping requirements. This forces callers to start a new bio when buffers span pgmap boundaries, even though the pages could safely coexist as separate bvec entries. This matters for guests where memory is registered through devm_memremap_pages() with MEMORY_DEVICE_GENERIC in multiple calls, creating separate dev_pagemaps for each chunk. When a direct I/O buffer spans two such chunks, bio_add_page() rejects the second page, forcing an unnecessary bio split or I/O failure. Introduce zone_device_pages_compatible() in blk.h to check whether two pages can coexist in the same bio as separate bvec entries. The block DMA iterator (blk_dma_map_iter_start) caches the P2PDMA mapping state from the first segment and applies it to all others, so P2PDMA pages from different pgmaps must not be mixed, and neither must P2PDMA and non-P2PDMA pages. All other combinations (MEMORY_DEVICE_GENERIC pages from different pgmaps, or MEMORY_DEVICE_GENERIC with normal RAM) use the same dma_map_phys path and are safe. Replace the blanket zone_device_pages_have_same_pgmap() rejection with zone_device_pages_compatible(), while keeping zone_device_pages_have_same_pgmap() as a merge guard. Pages from different pgmaps can be added as separate bvec entries but must not be coalesced into the same segment, as that would make it impossible to recover the correct pgmap via page_pgmap(). Fixes: 49580e690755 ("block: add check when merging zone device pages") Cc: stable@vger.kernel.org Signed-off-by: Naman Jain Reviewed-by: Christoph Hellwig Link: https://patch.msgid.link/20260410153414.4159050-3-namjain@linux.microsoft.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/bio-integrity.c | 6 +++--- block/bio.c | 6 +++--- block/blk.h | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index bed26f1ec869a..12d887349c260 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -128,10 +128,10 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, if (bip->bip_vcnt > 0) { struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1]; - if (!zone_device_pages_have_same_pgmap(bv->bv_page, page)) + if (!zone_device_pages_compatible(bv->bv_page, page)) return 0; - - if (bvec_try_merge_hw_page(q, bv, page, len, offset)) { + if (zone_device_pages_have_same_pgmap(bv->bv_page, page) && + bvec_try_merge_hw_page(q, bv, page, len, offset)) { bip->bip_iter.bi_size += len; return len; } diff --git a/block/bio.c b/block/bio.c index b3a79285c278d..b184f0a3fcb2b 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1030,10 +1030,10 @@ int bio_add_page(struct bio *bio, struct page *page, if (bio->bi_vcnt > 0) { struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; - if (!zone_device_pages_have_same_pgmap(bv->bv_page, page)) + if (!zone_device_pages_compatible(bv->bv_page, page)) return 0; - - if (bvec_try_merge_page(bv, page, len, offset)) { + if (zone_device_pages_have_same_pgmap(bv->bv_page, page) && + bvec_try_merge_page(bv, page, len, offset)) { bio->bi_iter.bi_size += len; return len; } diff --git a/block/blk.h b/block/blk.h index 06dfb5b670179..80a6a942a10af 100644 --- a/block/blk.h +++ b/block/blk.h @@ -139,6 +139,25 @@ static inline bool biovec_phys_mergeable(struct request_queue *q, return true; } +/* + * Check if two pages from potentially different zone device pgmaps can + * coexist as separate bvec entries in the same bio. + * + * The block DMA iterator (blk_dma_map_iter_start) caches the P2PDMA mapping + * state from the first segment and applies it to all subsequent segments, so + * P2PDMA pages from different pgmaps must not be mixed in the same bio. + * + * Other zone device types (FS_DAX, GENERIC) use the same dma_map_phys() path + * as normal RAM. PRIVATE and COHERENT pages never appear in bios. + */ +static inline bool zone_device_pages_compatible(const struct page *a, + const struct page *b) +{ + if (is_pci_p2pdma_page(a) || is_pci_p2pdma_page(b)) + return zone_device_pages_have_same_pgmap(a, b); + return true; +} + static inline bool __bvec_gap_to_prev(const struct queue_limits *lim, struct bio_vec *bprv, unsigned int offset) { From 23b4b18bac285619c73dd79f6d6e1809c03891c7 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Sat, 14 Mar 2026 16:12:24 -0500 Subject: [PATCH 099/554] iio: adc: ti-ads7950: use iio_push_to_buffers_with_ts_unaligned() commit 7806c060cceb2d6895efbb6cff2f2f17cf1ec5de upstream. Use iio_push_to_buffers_with_ts_unaligned() to avoid unaligned access when writing the timestamp in the rx_buf. The previous implementation would have been fine on architectures that support 4-byte alignment of 64-bit integers but could cause issues on architectures that require 8-byte alignment. Fixes: 902c4b2446d4 ("iio: adc: New driver for TI ADS7950 chips") Signed-off-by: David Lechner Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti-ads7950.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c index cdc6248895593..418452aaca810 100644 --- a/drivers/iio/adc/ti-ads7950.c +++ b/drivers/iio/adc/ti-ads7950.c @@ -47,8 +47,6 @@ #define TI_ADS7950_MAX_CHAN 16 #define TI_ADS7950_NUM_GPIOS 4 -#define TI_ADS7950_TIMESTAMP_SIZE (sizeof(int64_t) / sizeof(__be16)) - /* val = value, dec = left shift, bits = number of bits of the mask */ #define TI_ADS7950_EXTRACT(val, dec, bits) \ (((val) >> (dec)) & ((1 << (bits)) - 1)) @@ -105,8 +103,7 @@ struct ti_ads7950_state { * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ - u16 rx_buf[TI_ADS7950_MAX_CHAN + 2 + TI_ADS7950_TIMESTAMP_SIZE] - __aligned(IIO_DMA_MINALIGN); + u16 rx_buf[TI_ADS7950_MAX_CHAN + 2] __aligned(IIO_DMA_MINALIGN); u16 tx_buf[TI_ADS7950_MAX_CHAN + 2]; u16 single_tx; u16 single_rx; @@ -313,8 +310,10 @@ static irqreturn_t ti_ads7950_trigger_handler(int irq, void *p) if (ret < 0) goto out; - iio_push_to_buffers_with_timestamp(indio_dev, &st->rx_buf[2], - iio_get_time_ns(indio_dev)); + iio_push_to_buffers_with_ts_unaligned(indio_dev, &st->rx_buf[2], + sizeof(*st->rx_buf) * + TI_ADS7950_MAX_CHAN, + iio_get_time_ns(indio_dev)); out: mutex_unlock(&st->slock); From ea0697129807d718037f618221037aa0660ee3c5 Mon Sep 17 00:00:00 2001 From: Longxuan Yu Date: Sun, 12 Apr 2026 16:38:20 +0800 Subject: [PATCH 100/554] io_uring/poll: fix signed comparison in io_poll_get_ownership() commit 326941b22806cbf2df1fbfe902b7908b368cce42 upstream. io_poll_get_ownership() uses a signed comparison to check whether poll_refs has reached the threshold for the slowpath: if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) atomic_read() returns int (signed). When IO_POLL_CANCEL_FLAG (BIT(31)) is set in poll_refs, the value becomes negative in signed arithmetic, so the >= 128 comparison always evaluates to false and the slowpath is never taken. Fix this by casting the atomic_read() result to unsigned int before the comparison, so that the cancel flag is treated as a large positive value and correctly triggers the slowpath. Fixes: a26a35e9019f ("io_uring: make poll refs more robust") Cc: stable@vger.kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Zhengchuan Liang Signed-off-by: Longxuan Yu Signed-off-by: Ren Wei Reviewed-by: Pavel Begunkov Link: https://patch.msgid.link/3a3508b08bcd7f1bc3beff848ae6e1d73d355043.1775965597.git.ylong030@ucr.edu Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/poll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index 869c7b2a08b64..2f2de20f3d34e 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -93,7 +93,7 @@ static bool io_poll_get_ownership_slowpath(struct io_kiocb *req) */ static inline bool io_poll_get_ownership(struct io_kiocb *req) { - if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) + if (unlikely((unsigned int)atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) return io_poll_get_ownership_slowpath(req); return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); } From 9678e1b7f512b9d376e14f3ca8c01dc71569a3c4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 21 Apr 2026 13:24:33 -0600 Subject: [PATCH 101/554] io_uring/poll: ensure EPOLL_ONESHOT is propagated for EPOLL_URING_WAKE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1967f0b1cafdde37aa9e08e6021c14bcc484b7a5 upstream. Commit: aacf2f9f382c ("io_uring: fix req->apoll_events") fixed an issue where poll->events and req->apoll_events weren't synchronized, but then when the commit referenced in Fixes got added, it didn't ensure the same thing. If we mask in EPOLLONESHOT in the regular EPOLL_URING_WAKE path, then ensure it's done for both. Including a link to the original report below, even though it's mostly nonsense. But it includes a reproducer that does show that IORING_CQE_F_MORE is set in the previous CQE, while no more CQEs will be generated for this request. Just ignore anything that pretends this is security related in any way, it's just the typical AI nonsense. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/io-uring/CAM0zi7yQzF3eKncgHo4iVM5yFLAjsiob_ucqyWKs=hyd_GqiMg@mail.gmail.com/ Reported-by: Azizcan Daştan Fixes: 4464853277d0 ("io_uring: pass in EPOLL_URING_WAKE for eventfd signaling and wakeups") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/poll.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index 2f2de20f3d34e..a5e78747e63ab 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -412,8 +412,10 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, * disable multishot as there is a circular dependency between * CQ posting and triggering the event. */ - if (mask & EPOLL_URING_WAKE) + if (mask & EPOLL_URING_WAKE) { poll->events |= EPOLLONESHOT; + req->apoll_events |= EPOLLONESHOT; + } /* optional, saves extra locking for removal in tw handler */ if (mask && poll->events & EPOLLONESHOT) { From 33df746736749a8899be6406dfc56df19c3113d7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 20 Apr 2026 08:17:20 +0200 Subject: [PATCH 102/554] ALSA: core: Fix potential data race at fasync handling commit 8146cd333d235ed32d48bb803fdf743472d7c783 upstream. In snd_fasync_work_fn(), which is the offload work for traversing and processing the pending fasync list, the call of kill_fasync() is done outside the snd_fasync_lock for avoiding deadlocks. The problem is that its the references of fasync->on, fasync->signal and fasync->poll are done there also outside the lock. Since these may be modified by snd_kill_fasync() call concurrently from other process, inconsistent values might be passed to kill_fasync(). Although there shouldn't be critical UAF, it's still better to be addressed. This patch moves the kill_fasync() argument evaluations inside the snd_fasync_lock for avoiding the data races above. The handling in fasync->on flag is optimized in the loop to skip directly. Also, for more clarity, snd_fasync_free() takes the lock and unlink the pending entry more directly instead of clearing fasync->on flag. Reported-by: Jake Lamberson Fixes: ef34a0ae7a26 ("ALSA: core: Add async signal helpers") Cc: Link: https://patch.msgid.link/20260420061721.3253644-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/misc.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sound/core/misc.c b/sound/core/misc.c index 88d9e1f9a6e9f..5aca09edf9718 100644 --- a/sound/core/misc.c +++ b/sound/core/misc.c @@ -100,14 +100,18 @@ static LIST_HEAD(snd_fasync_list); static void snd_fasync_work_fn(struct work_struct *work) { struct snd_fasync *fasync; + int signal, poll; spin_lock_irq(&snd_fasync_lock); while (!list_empty(&snd_fasync_list)) { fasync = list_first_entry(&snd_fasync_list, struct snd_fasync, list); list_del_init(&fasync->list); + if (!fasync->on) + continue; + signal = fasync->signal; + poll = fasync->poll; spin_unlock_irq(&snd_fasync_lock); - if (fasync->on) - kill_fasync(&fasync->fasync, fasync->signal, fasync->poll); + kill_fasync(&fasync->fasync, signal, poll); spin_lock_irq(&snd_fasync_lock); } spin_unlock_irq(&snd_fasync_lock); @@ -158,7 +162,10 @@ void snd_fasync_free(struct snd_fasync *fasync) { if (!fasync) return; - fasync->on = 0; + + scoped_guard(spinlock_irq, &snd_fasync_lock) + list_del_init(&fasync->list); + flush_work(&snd_fasync_work); kfree(fasync); } From 30bd9ed68c84f060319c7edea9ab7f7b64565ef5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Fri, 17 Apr 2026 10:41:33 -0300 Subject: [PATCH 103/554] ALSA: caiaq: Fix control_put() result and cache rollback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a3542d1b30f92307f545f2def14e8d988dffdff0 upstream. control_put() always returns 1 and updates cdev->control_state[] before sending the USB command. It also ignores transport errors from usb_bulk_msg(), snd_usb_caiaq_send_command(), and snd_usb_caiaq_send_command_bank(). That breaks the ALSA .put() contract and can leave control_get() reporting a cached value the device never accepted. Return 0 for unchanged values, propagate transport failures, and restore the cached byte when the write fails. Fixes: 8e3cd08ed8e59 ("[ALSA] caiaq - add control API and more input features") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260417-caiaq-control-put-v1-1-c37826e92447@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/caiaq/control.c | 52 +++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/sound/usb/caiaq/control.c b/sound/usb/caiaq/control.c index af459c49baf4c..4598fb7e8be02 100644 --- a/sound/usb/caiaq/control.c +++ b/sound/usb/caiaq/control.c @@ -87,6 +87,7 @@ static int control_put(struct snd_kcontrol *kcontrol, struct snd_usb_caiaqdev *cdev = caiaqdev(chip->card); int pos = kcontrol->private_value; int v = ucontrol->value.integer.value[0]; + int ret; unsigned char cmd; switch (cdev->chip.usb_id) { @@ -103,6 +104,10 @@ static int control_put(struct snd_kcontrol *kcontrol, if (pos & CNT_INTVAL) { int i = pos & ~CNT_INTVAL; + unsigned char old = cdev->control_state[i]; + + if (old == v) + return 0; cdev->control_state[i] = v; @@ -113,10 +118,11 @@ static int control_put(struct snd_kcontrol *kcontrol, cdev->ep8_out_buf[0] = i; cdev->ep8_out_buf[1] = v; - usb_bulk_msg(cdev->chip.dev, - usb_sndbulkpipe(cdev->chip.dev, 8), - cdev->ep8_out_buf, sizeof(cdev->ep8_out_buf), - &actual_len, 200); + ret = usb_bulk_msg(cdev->chip.dev, + usb_sndbulkpipe(cdev->chip.dev, 8), + cdev->ep8_out_buf, + sizeof(cdev->ep8_out_buf), + &actual_len, 200); } else if (cdev->chip.usb_id == USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_MASCHINECONTROLLER)) { @@ -128,21 +134,36 @@ static int control_put(struct snd_kcontrol *kcontrol, offset = MASCHINE_BANK_SIZE; } - snd_usb_caiaq_send_command_bank(cdev, cmd, bank, - cdev->control_state + offset, - MASCHINE_BANK_SIZE); + ret = snd_usb_caiaq_send_command_bank(cdev, cmd, bank, + cdev->control_state + offset, + MASCHINE_BANK_SIZE); } else { - snd_usb_caiaq_send_command(cdev, cmd, - cdev->control_state, sizeof(cdev->control_state)); + ret = snd_usb_caiaq_send_command(cdev, cmd, + cdev->control_state, + sizeof(cdev->control_state)); + } + + if (ret < 0) { + cdev->control_state[i] = old; + return ret; } } else { - if (v) - cdev->control_state[pos / 8] |= 1 << (pos % 8); - else - cdev->control_state[pos / 8] &= ~(1 << (pos % 8)); + int idx = pos / 8; + unsigned char mask = 1 << (pos % 8); + unsigned char old = cdev->control_state[idx]; + unsigned char val = v ? (old | mask) : (old & ~mask); - snd_usb_caiaq_send_command(cdev, cmd, - cdev->control_state, sizeof(cdev->control_state)); + if (old == val) + return 0; + + cdev->control_state[idx] = val; + ret = snd_usb_caiaq_send_command(cdev, cmd, + cdev->control_state, + sizeof(cdev->control_state)); + if (ret < 0) { + cdev->control_state[idx] = old; + return ret; + } } return 1; @@ -640,4 +661,3 @@ int snd_usb_caiaq_control_init(struct snd_usb_caiaqdev *cdev) return ret; } - From e59ecd4ee3a450db6cb4e4ecaa3efdd593f80056 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 14 Apr 2026 12:59:00 +0200 Subject: [PATCH 104/554] ALSA: caiaq: Handle probe errors properly commit 28abd224db4a49560b452115bca3672a20e45b2f upstream. The probe procedure of setup_card() in caiaq driver doesn't treat the error cases gracefully, e.g. the error from snd_card_register() calls snd_card_free() but continues. This would lead to a UAF for the further calls like snd_usb_caiaq_control_init(), as Berk suggested in another patch in the link below. However, the problem is not only that; in general, this function drops the all error handlings (as it's a void function) although its caller can propagate an error to snd_probe(), which eventually calls snd_card_free() as a proper error path. That said, we should treat each error case in setup_card(), and just return the error code promptly, which is then handled later as a fatal error in snd_probe(). This patch achieves it by changing the setup_card() to return an error code. Also, the superfluous snd_card_free() call is removed, too. Note that card->private_free can be set still safely at returning an error. All called functions in card_free() have checks of the unassigned resources or NULL checks. Fixes: 8e3cd08ed8e5 ("[ALSA] caiaq - add control API and more input features") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20260413034941.1131465-2-berkcgoksel@gmail.com Link: https://patch.msgid.link/20260414105916.364073-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/caiaq/device.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index 51177ebfb8c62..8af0c04041ee3 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -290,7 +290,7 @@ int snd_usb_caiaq_set_auto_msg(struct snd_usb_caiaqdev *cdev, tmp, sizeof(tmp)); } -static void setup_card(struct snd_usb_caiaqdev *cdev) +static int setup_card(struct snd_usb_caiaqdev *cdev) { int ret; char val[4]; @@ -325,8 +325,10 @@ static void setup_card(struct snd_usb_caiaqdev *cdev) snd_usb_caiaq_send_command(cdev, EP1_CMD_READ_IO, NULL, 0); if (!wait_event_timeout(cdev->ep1_wait_queue, - cdev->control_state[0] != 0xff, HZ)) - return; + cdev->control_state[0] != 0xff, HZ)) { + dev_err(dev, "Read timeout for control state\n"); + return -EINVAL; + } /* fix up some defaults */ if ((cdev->control_state[1] != 2) || @@ -347,33 +349,43 @@ static void setup_card(struct snd_usb_caiaqdev *cdev) cdev->spec.num_digital_audio_out + cdev->spec.num_digital_audio_in > 0) { ret = snd_usb_caiaq_audio_init(cdev); - if (ret < 0) + if (ret < 0) { dev_err(dev, "Unable to set up audio system (ret=%d)\n", ret); + return ret; + } } if (cdev->spec.num_midi_in + cdev->spec.num_midi_out > 0) { ret = snd_usb_caiaq_midi_init(cdev); - if (ret < 0) + if (ret < 0) { dev_err(dev, "Unable to set up MIDI system (ret=%d)\n", ret); + return ret; + } } #ifdef CONFIG_SND_USB_CAIAQ_INPUT ret = snd_usb_caiaq_input_init(cdev); - if (ret < 0) + if (ret < 0) { dev_err(dev, "Unable to set up input system (ret=%d)\n", ret); + return ret; + } #endif /* finally, register the card and all its sub-instances */ ret = snd_card_register(cdev->chip.card); if (ret < 0) { dev_err(dev, "snd_card_register() returned %d\n", ret); - snd_card_free(cdev->chip.card); + return ret; } ret = snd_usb_caiaq_control_init(cdev); - if (ret < 0) + if (ret < 0) { dev_err(dev, "Unable to set up control system (ret=%d)\n", ret); + return ret; + } + + return 0; } static void card_free(struct snd_card *card) @@ -499,8 +511,11 @@ static int init_card(struct snd_usb_caiaqdev *cdev) scnprintf(card->longname, sizeof(card->longname), "%s %s (%s)", cdev->vendor_name, cdev->product_name, usbpath); - setup_card(cdev); card->private_free = card_free; + err = setup_card(cdev); + if (err < 0) + return err; + return 0; err_kill_urb: From 1ac67f49ae2cd6ba590b80c5b2a0aef7ac580d61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Thu, 16 Apr 2026 10:24:40 -0300 Subject: [PATCH 105/554] ALSA: 6fire: Fix input volume change detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dc88eef8f55e85e92d016cdf7e291f5560efd79b upstream. usb6fire_control_input_vol_put() stores the analog capture volume as a signed offset in rt->input_vol[] (-15..+15), but it compares the cached value against the user-visible mixer value (0..30) before subtracting 15. This mixes two domains in the change detection path. Since the runtime is zero-initialized, the visible default is 15; writing 0 right after probe is ignored, while writing 15 is reported as a change even though the cached value remains 0. Normalize the user value before comparing it with the cached offset. Fixes: 06bb4e743501 ("ALSA: snd-usb-6fire: add analog input volume control") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260416-alsa-6fire-input-volume-change-detection-v1-1-ec78299168df@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/6fire/control.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/usb/6fire/control.c b/sound/usb/6fire/control.c index 9bd8dcbb68e41..7c2274120c765 100644 --- a/sound/usb/6fire/control.c +++ b/sound/usb/6fire/control.c @@ -290,15 +290,17 @@ static int usb6fire_control_input_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct control_runtime *rt = snd_kcontrol_chip(kcontrol); + int vol0 = ucontrol->value.integer.value[0] - 15; + int vol1 = ucontrol->value.integer.value[1] - 15; int changed = 0; - if (rt->input_vol[0] != ucontrol->value.integer.value[0]) { - rt->input_vol[0] = ucontrol->value.integer.value[0] - 15; + if (rt->input_vol[0] != vol0) { + rt->input_vol[0] = vol0; rt->ivol_updated &= ~(1 << 0); changed = 1; } - if (rt->input_vol[1] != ucontrol->value.integer.value[1]) { - rt->input_vol[1] = ucontrol->value.integer.value[1] - 15; + if (rt->input_vol[1] != vol1) { + rt->input_vol[1] = vol1; rt->ivol_updated &= ~(1 << 1); changed = 1; } From 89ed38540e6be0c956af3dd980df36c7c9a0e3e2 Mon Sep 17 00:00:00 2001 From: Spencer Payton Date: Tue, 21 Apr 2026 10:49:18 +0200 Subject: [PATCH 106/554] ALSA: hda/realtek - Add mute LED support for HP Victus 15-fa2xxx commit eacda758e3c01db98b5c231f56cf9a6e05ced75c upstream. The mute LED on this laptop uses ALC245 but requires a quirk to work. This patch enables the existing ALC245_FIXUP_HP_MUTE_LED_COEFBIT quirk for the device. Tested my Victus 15-fa2xxx (PCI SSID 103c:8dcd). The LED behaviour works as intended. Cc: stable@vger.kernel.org Signed-off-by: Spencer Payton Link: https://patch.msgid.link/20260421084918.14685-1-spayton681@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 8e8ba928d3c02..622cd353bf25c 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6967,6 +6967,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8d90, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d91, "HP ZBook Firefly 14 G12", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d92, "HP ZBook Firefly 16 G12", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8dcd, "HP Victus 15-fa2xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8d9b, "HP 17 Turbine OmniBook 7 UMA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8d9c, "HP 17 Turbine OmniBook 7 DIS", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8d9d, "HP 17 Turbine OmniBook X UMA", ALC287_FIXUP_CS35L41_I2C_2), From 91c8e75ba1f162db50ade4151a9068f45a197986 Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Thu, 16 Apr 2026 03:31:38 +0800 Subject: [PATCH 107/554] ALSA: pcmtest: fix reference leak on failed device registration commit 4ff036f95238f02c87e5d7c0a9d93748582a8950 upstream. When platform_device_register() fails in mod_init(), the embedded struct device in pcmtst_pdev has already been initialized by device_initialize(), but the failure path returns the error without dropping the device reference for the current platform device: mod_init() -> platform_device_register(&pcmtst_pdev) -> device_initialize(&pcmtst_pdev.dev) -> setup_pdev_dma_masks(&pcmtst_pdev) -> platform_device_add(&pcmtst_pdev) This leads to a reference leak when platform_device_register() fails. Fix this by calling platform_device_put() before returning the error. The issue was identified by a static analysis tool I developed and confirmed by manual review. Fixes: 315a3d57c64c5 ("ALSA: Implement the new Virtual PCM Test Driver") Cc: stable@vger.kernel.org Signed-off-by: Guangshuo Li Link: https://patch.msgid.link/20260415193138.3861297-1-lgs201920130244@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/drivers/pcmtest.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/drivers/pcmtest.c b/sound/drivers/pcmtest.c index 19b3f306c5645..c5b78859f227d 100644 --- a/sound/drivers/pcmtest.c +++ b/sound/drivers/pcmtest.c @@ -756,8 +756,10 @@ static int __init mod_init(void) if (err) return err; err = platform_device_register(&pcmtst_pdev); - if (err) + if (err) { + platform_device_put(&pcmtst_pdev); return err; + } err = platform_driver_register(&pcmtst_pdrv); if (err) platform_device_unregister(&pcmtst_pdev); From 54af1a51bb8b9d3c116d7f4913dd01cf79457dc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Tue, 21 Apr 2026 10:03:06 -0300 Subject: [PATCH 108/554] ALSA: pcmtest: Fix resource leaks in module init error paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d5d5f80416a3a749906c04d56575e2290792654b upstream. pcmtest allocates its pattern buffers and creates its debugfs tree before registering the platform device and driver, but mod_init() does not release those resources when a later init step fails. As a result, a debugfs directory creation failure leaks the pattern buffers, while platform_device_register() and platform_driver_register() failures leave both the pattern buffers and the debugfs tree behind. The recent fix for failed device registration only dropped the embedded device reference. Add the missing cleanup for the debugfs tree and pattern buffers in the remaining module init error paths. Fixes: 315a3d57c64c ("ALSA: Implement the new Virtual PCM Test Driver") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260421-alsa-pcmtest-init-unwind-v1-1-03fe0c423dbb@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/drivers/pcmtest.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/sound/drivers/pcmtest.c b/sound/drivers/pcmtest.c index c5b78859f227d..58fef1250f651 100644 --- a/sound/drivers/pcmtest.c +++ b/sound/drivers/pcmtest.c @@ -754,15 +754,24 @@ static int __init mod_init(void) err = init_debug_files(buf_allocated); if (err) - return err; + goto err_free_patterns; err = platform_device_register(&pcmtst_pdev); if (err) { platform_device_put(&pcmtst_pdev); - return err; + goto err_clear_debug; } err = platform_driver_register(&pcmtst_pdrv); - if (err) + if (err) { platform_device_unregister(&pcmtst_pdev); + goto err_clear_debug; + } + + return 0; + +err_clear_debug: + clear_debug_files(); +err_free_patterns: + free_pattern_buffers(); return err; } From 4ca3746d9492da660b28638bc262b73dd742af21 Mon Sep 17 00:00:00 2001 From: Jonathan Santos Date: Mon, 23 Feb 2026 08:59:26 -0300 Subject: [PATCH 109/554] iio: adc: ad7768-1: fix one-shot mode data acquisition commit 8be19e233744961db6069da9c9ab63eb085a0447 upstream. According to the datasheet, one-shot mode requires a SYNC_IN pulse to trigger a new sample conversion. In the current implementation, No sync pulse was sent after switching to one-shot mode and reinit_completion() was called before mode switching, creating a race condition where spurious interrupts during mode change could trigger completion prematurely. Fix by sending a sync pulse after configuring one-shot mode and reinit_completion() to ensure it only waits for the actual conversion completion. Fixes: a5f8c7da3dbe ("iio: adc: Add AD7768-1 ADC basic support") Signed-off-by: Jonathan Santos Reviewed-by: David Lechner Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ad7768-1.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index 872c88d0c86ca..25d7a701f3adb 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -389,12 +389,17 @@ static int ad7768_scan_direct(struct iio_dev *indio_dev) struct ad7768_state *st = iio_priv(indio_dev); int readval, ret; - reinit_completion(&st->completion); - ret = ad7768_set_mode(st, AD7768_ONE_SHOT); if (ret < 0) return ret; + reinit_completion(&st->completion); + + /* One-shot mode requires a SYNC pulse to generate a new sample */ + ret = ad7768_send_sync_pulse(st); + if (ret) + return ret; + ret = wait_for_completion_timeout(&st->completion, msecs_to_jiffies(1000)); if (!ret) From d42d5404782fe4cd6617b01e45f197baa802f64a Mon Sep 17 00:00:00 2001 From: Jonathan Santos Date: Mon, 23 Feb 2026 08:59:35 -0300 Subject: [PATCH 110/554] iio: adc: ad7768-1: remove switch to one-shot mode commit 81fdc3127d013a552465c3bf9829afbed5184406 upstream. wideband low ripple FIR Filter is not available in one-shot mode. In order to make direct reads work for all filter options, remove the switch for one-shot mode and guarantee device is always in continuous conversion mode. Fixes: fb1d3b24ebf5 ("iio: adc: ad7768-1: add filter type and oversampling ratio attributes") Signed-off-by: Jonathan Santos Reviewed-by: David Lechner Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ad7768-1.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index 25d7a701f3adb..45d2cb77d5eca 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -389,17 +389,8 @@ static int ad7768_scan_direct(struct iio_dev *indio_dev) struct ad7768_state *st = iio_priv(indio_dev); int readval, ret; - ret = ad7768_set_mode(st, AD7768_ONE_SHOT); - if (ret < 0) - return ret; - reinit_completion(&st->completion); - /* One-shot mode requires a SYNC pulse to generate a new sample */ - ret = ad7768_send_sync_pulse(st); - if (ret) - return ret; - ret = wait_for_completion_timeout(&st->completion, msecs_to_jiffies(1000)); if (!ret) @@ -418,14 +409,6 @@ static int ad7768_scan_direct(struct iio_dev *indio_dev) if (st->oversampling_ratio == 8) readval >>= 8; - /* - * Any SPI configuration of the AD7768-1 can only be - * performed in continuous conversion mode. - */ - ret = ad7768_set_mode(st, AD7768_CONTINUOUS); - if (ret < 0) - return ret; - return readval; } @@ -1025,6 +1008,10 @@ static int ad7768_setup(struct iio_dev *indio_dev) return ret; } + ret = ad7768_set_mode(st, AD7768_CONTINUOUS); + if (ret) + return ret; + /* For backwards compatibility, try the adi,sync-in-gpios property */ st->gpio_sync_in = devm_gpiod_get_optional(&st->spi->dev, "adi,sync-in", GPIOD_OUT_LOW); From 996b0487b3cdda4c91811dbb1c9564626bc840bd Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 22 Apr 2026 17:14:32 +0100 Subject: [PATCH 111/554] rxrpc: Fix potential UAF after skb_unshare() failure commit 1f2740150f904bfa60e4bad74d65add3ccb5e7f8 upstream. If skb_unshare() fails to unshare a packet due to allocation failure in rxrpc_input_packet(), the skb pointer in the parent (rxrpc_io_thread()) will be NULL'd out. This will likely cause the call to trace_rxrpc_rx_done() to oops. Fix this by moving the unsharing down to where rxrpc_input_call_event() calls rxrpc_input_call_packet(). There are a number of places prior to that where we ignore DATA packets for a variety of reasons (such as the call already being complete) for which an unshare is then avoided. And with that, rxrpc_input_packet() doesn't need to take a pointer to the pointer to the packet, so change that to just a pointer. Fixes: 2d1faf7a0ca3 ("rxrpc: Simplify skbuff accounting in receive path") Closes: https://sashiko.dev/#/patchset/20260408121252.2249051-1-dhowells%40redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260422161438.2593376-4-dhowells@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/trace/events/rxrpc.h | 4 ++-- net/rxrpc/ar-internal.h | 1 - net/rxrpc/call_event.c | 19 ++++++++++++++++++- net/rxrpc/io_thread.c | 24 ++---------------------- net/rxrpc/skbuff.c | 9 --------- 5 files changed, 22 insertions(+), 35 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 578b8038b2117..8d77828b75155 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -161,8 +161,6 @@ E_(rxrpc_call_poke_timer_now, "Timer-now") #define rxrpc_skb_traces \ - EM(rxrpc_skb_eaten_by_unshare, "ETN unshare ") \ - EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ EM(rxrpc_skb_get_call_rx, "GET call-rx ") \ EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ @@ -189,6 +187,7 @@ EM(rxrpc_skb_put_purge, "PUT purge ") \ EM(rxrpc_skb_put_purge_oob, "PUT purge-oob") \ EM(rxrpc_skb_put_response, "PUT response ") \ + EM(rxrpc_skb_put_response_copy, "PUT resp-cpy ") \ EM(rxrpc_skb_put_rotate, "PUT rotate ") \ EM(rxrpc_skb_put_unknown, "PUT unknown ") \ EM(rxrpc_skb_see_conn_work, "SEE conn-work") \ @@ -197,6 +196,7 @@ EM(rxrpc_skb_see_recvmsg_oob, "SEE recvm-oob") \ EM(rxrpc_skb_see_reject, "SEE reject ") \ EM(rxrpc_skb_see_rotate, "SEE rotate ") \ + EM(rxrpc_skb_see_unshare_nomem, "SEE unshar-nm") \ E_(rxrpc_skb_see_version, "SEE version ") #define rxrpc_local_traces \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 96ecb83c90715..27c2aa2dd023c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1486,7 +1486,6 @@ int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int); void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); -void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index fec59d9338b9f..cc8f9dfa44e8a 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -332,7 +332,24 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK; - rxrpc_input_call_packet(call, skb); + if (sp->hdr.securityIndex != 0 && + skb_cloned(skb)) { + /* Unshare the packet so that it can be + * modified by in-place decryption. + */ + struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); + + if (nskb) { + rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); + rxrpc_input_call_packet(call, nskb); + rxrpc_free_skb(nskb, rxrpc_skb_put_call_rx); + } else { + /* OOM - Drop the packet. */ + rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); + } + } else { + rxrpc_input_call_packet(call, skb); + } rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); did_receive = true; } diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 6979569319252..dc5184a2fa9d1 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -192,13 +192,12 @@ static bool rxrpc_extract_abort(struct sk_buff *skb) /* * Process packets received on the local endpoint */ -static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) +static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff *skb) { struct rxrpc_connection *conn; struct sockaddr_rxrpc peer_srx; struct rxrpc_skb_priv *sp; struct rxrpc_peer *peer = NULL; - struct sk_buff *skb = *_skb; bool ret = false; skb_pull(skb, sizeof(struct udphdr)); @@ -244,25 +243,6 @@ static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call); if (sp->hdr.seq == 0) return rxrpc_bad_message(skb, rxrpc_badmsg_zero_seq); - - /* Unshare the packet so that it can be modified for in-place - * decryption. - */ - if (sp->hdr.securityIndex != 0) { - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) { - rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem); - *_skb = NULL; - return just_discard; - } - - if (skb != *_skb) { - rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare); - *_skb = skb; - rxrpc_new_skb(skb, rxrpc_skb_new_unshared); - sp = rxrpc_skb(skb); - } - } break; case RXRPC_PACKET_TYPE_CHALLENGE: @@ -494,7 +474,7 @@ int rxrpc_io_thread(void *data) switch (skb->mark) { case RXRPC_SKB_MARK_PACKET: skb->priority = 0; - if (!rxrpc_input_packet(local, &skb)) + if (!rxrpc_input_packet(local, skb)) rxrpc_reject_packet(local, skb); trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_free_skb(skb, rxrpc_skb_put_input); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 3bcd6ee803960..e2169d1a14b5f 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -46,15 +46,6 @@ void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) skb_get(skb); } -/* - * Note the dropping of a ref on a socket buffer by the core. - */ -void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) -{ - int n = atomic_inc_return(&rxrpc_n_rx_skbs); - trace_rxrpc_skb(skb, 0, n, why); -} - /* * Note the destruction of a socket buffer. */ From 861b9a0a1823bf064a7b810d29502a9ef043f40f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 22 Apr 2026 17:14:30 +0100 Subject: [PATCH 112/554] rxrpc: Fix memory leaks in rxkad_verify_response() commit 34f61a07e0cdefaecd3ec03bb5fb22215643678f upstream. Fix rxkad_verify_response() to free the ticket and the server key under all circumstances by initialising the ticket pointer to NULL and then making all paths through the function after the first allocation has been done go through a single common epilogue that just releases everything - where all the releases skip on a NULL pointer. Fixes: 57af281e5389 ("rxrpc: Tidy up abort generation infrastructure") Fixes: ec832bd06d6f ("rxrpc: Don't retain the server key in the connection") Closes: https://sashiko.dev/#/patchset/20260408121252.2249051-1-dhowells%40redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260422161438.2593376-2-dhowells@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/rxkad.c | 103 +++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 61 deletions(-) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 4a3c630941499..4e04e5acf6eed 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -1136,7 +1136,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, struct rxrpc_crypt session_key; struct key *server_key; time64_t expiry; - void *ticket; + void *ticket = NULL; u32 version, kvno, ticket_len, level; __be32 csum; int ret, i; @@ -1162,13 +1162,13 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, ret = -ENOMEM; response = kzalloc(sizeof(struct rxkad_response), GFP_NOFS); if (!response) - goto temporary_error; + goto error; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), response, sizeof(*response)) < 0) { - rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, - rxkad_abort_resp_short); - goto protocol_error; + ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short); + goto error; } version = ntohl(response->version); @@ -1178,62 +1178,62 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len); if (version != RXKAD_VERSION) { - rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, - rxkad_abort_resp_version); - goto protocol_error; + ret = rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, + rxkad_abort_resp_version); + goto error; } if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) { - rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO, - rxkad_abort_resp_tkt_len); - goto protocol_error; + ret = rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO, + rxkad_abort_resp_tkt_len); + goto error; } if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) { - rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO, - rxkad_abort_resp_unknown_tkt); - goto protocol_error; + ret = rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO, + rxkad_abort_resp_unknown_tkt); + goto error; } /* extract the kerberos ticket and decrypt and decode it */ ret = -ENOMEM; ticket = kmalloc(ticket_len, GFP_NOFS); if (!ticket) - goto temporary_error_free_resp; + goto error; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), ticket, ticket_len) < 0) { - rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, - rxkad_abort_resp_short_tkt); - goto protocol_error; + ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short_tkt); + goto error; } ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len, &session_key, &expiry); if (ret < 0) - goto temporary_error_free_ticket; + goto error; /* use the session key from inside the ticket to decrypt the * response */ ret = rxkad_decrypt_response(conn, response, &session_key); if (ret < 0) - goto temporary_error_free_ticket; + goto error; if (ntohl(response->encrypted.epoch) != conn->proto.epoch || ntohl(response->encrypted.cid) != conn->proto.cid || ntohl(response->encrypted.securityIndex) != conn->security_ix) { - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, - rxkad_abort_resp_bad_param); - goto protocol_error_free; + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_param); + goto error; } csum = response->encrypted.checksum; response->encrypted.checksum = 0; rxkad_calc_response_checksum(response); if (response->encrypted.checksum != csum) { - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, - rxkad_abort_resp_bad_checksum); - goto protocol_error_free; + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_checksum); + goto error; } for (i = 0; i < RXRPC_MAXCALLS; i++) { @@ -1241,38 +1241,38 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, u32 counter = READ_ONCE(conn->channels[i].call_counter); if (call_id > INT_MAX) { - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, - rxkad_abort_resp_bad_callid); - goto protocol_error_free; + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_callid); + goto error; } if (call_id < counter) { - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, - rxkad_abort_resp_call_ctr); - goto protocol_error_free; + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_call_ctr); + goto error; } if (call_id > counter) { if (conn->channels[i].call) { - rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + ret = rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, rxkad_abort_resp_call_state); - goto protocol_error_free; + goto error; } conn->channels[i].call_counter = call_id; } } if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) { - rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO, - rxkad_abort_resp_ooseq); - goto protocol_error_free; + ret = rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO, + rxkad_abort_resp_ooseq); + goto error; } level = ntohl(response->encrypted.level); if (level > RXRPC_SECURITY_ENCRYPT) { - rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO, - rxkad_abort_resp_level); - goto protocol_error_free; + ret = rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO, + rxkad_abort_resp_level); + goto error; } conn->security_level = level; @@ -1280,31 +1280,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, * this the connection security can be handled in exactly the same way * as for a client connection */ ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno); - if (ret < 0) - goto temporary_error_free_ticket; - - kfree(ticket); - kfree(response); - _leave(" = 0"); - return 0; -protocol_error_free: - kfree(ticket); -protocol_error: - kfree(response); - key_put(server_key); - return -EPROTO; - -temporary_error_free_ticket: +error: kfree(ticket); -temporary_error_free_resp: kfree(response); -temporary_error: - /* Ignore the response packet if we got a temporary error such as - * ENOMEM. We just want to send the challenge again. Note that we - * also come out this way if the ticket decryption fails. - */ key_put(server_key); + _leave(" = %d", ret); return ret; } From ca71ac2de389b01eecdc48bfafbdf073ec232044 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 22 Apr 2026 17:14:33 +0100 Subject: [PATCH 113/554] rxrpc: Fix conn-level packet handling to unshare RESPONSE packets commit 24481a7f573305706054c59e275371f8d0fe919f upstream. The security operations that verify the RESPONSE packets decrypt bits of it in place - however, the sk_buff may be shared with a packet sniffer, which would lead to the sniffer seeing an apparently corrupt packet (actually decrypted). Fix this by handing a copy of the packet off to the specific security handler if the packet was cloned. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Closes: https://sashiko.dev/#/patchset/20260408121252.2249051-1-dhowells%40redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260422161438.2593376-5-dhowells@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/conn_event.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 9a41ec708aeb9..aee977291d90b 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -240,6 +240,33 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call) rxrpc_notify_socket(call); } +static int rxrpc_verify_response(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + int ret; + + if (skb_cloned(skb)) { + /* Copy the packet if shared so that we can do in-place + * decryption. + */ + struct sk_buff *nskb = skb_copy(skb, GFP_NOFS); + + if (nskb) { + rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); + ret = conn->security->verify_response(conn, nskb); + rxrpc_free_skb(nskb, rxrpc_skb_put_response_copy); + } else { + /* OOM - Drop the packet. */ + rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); + ret = -ENOMEM; + } + } else { + ret = conn->security->verify_response(conn, skb); + } + + return ret; +} + /* * connection-level Rx packet processor */ @@ -270,7 +297,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, } spin_unlock_irq(&conn->state_lock); - ret = conn->security->verify_response(conn, skb); + ret = rxrpc_verify_response(conn, skb); if (ret < 0) return ret; From f0d3efd03b2a9e0f1ffa6df8fcb264af3d494286 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 22 Apr 2026 17:14:31 +0100 Subject: [PATCH 114/554] rxrpc: Fix rxkad crypto unalignment handling commit def304aae2edf321d2671fd6ca766a93c21f877e upstream. Fix handling of a packet with a misaligned crypto length. Also handle non-ENOMEM errors from decryption by aborting. Further, remove the WARN_ON_ONCE() so that it can't be remotely triggered (a trace line can still be emitted). Fixes: f93af41b9f5f ("rxrpc: Fix missing error checks for rxkad encryption/decryption failure") Closes: https://sashiko.dev/#/patchset/20260408121252.2249051-1-dhowells%40redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260422161438.2593376-3-dhowells@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/trace/events/rxrpc.h | 1 + net/rxrpc/rxkad.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 8d77828b75155..13b9d017f8e17 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -37,6 +37,7 @@ EM(rxkad_abort_1_short_encdata, "rxkad1-short-encdata") \ EM(rxkad_abort_1_short_header, "rxkad1-short-hdr") \ EM(rxkad_abort_2_short_check, "rxkad2-short-check") \ + EM(rxkad_abort_2_crypto_unaligned, "rxkad2-crypto-unaligned") \ EM(rxkad_abort_2_short_data, "rxkad2-short-data") \ EM(rxkad_abort_2_short_header, "rxkad2-short-hdr") \ EM(rxkad_abort_2_short_len, "rxkad2-short-len") \ diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 4e04e5acf6eed..af24d54b6b4fc 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -510,6 +510,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, rxkad_abort_2_short_header); + /* Don't let the crypto algo see a misaligned length. */ + sp->len = round_down(sp->len, 8); + /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. */ @@ -543,8 +546,10 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, if (sg != _sg) kfree(sg); if (ret < 0) { - WARN_ON_ONCE(ret != -ENOMEM); - return ret; + if (ret == -ENOMEM) + return ret; + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_2_crypto_unaligned); } /* Extract the decrypted packet length */ From 293095ef618818852bac5488c1bc223935e2ca17 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 23 Apr 2026 21:09:08 +0100 Subject: [PATCH 115/554] rxrpc: Fix error handling in rxgk_extract_token() commit 3476c8bb960f48e49355d6f93fb7673211e0163f upstream. Fix a missing bit of error handling in rxgk_extract_token(): in the event that rxgk_decrypt_skb() returns -ENOMEM, it should just return that rather than continuing on (for anything else, it generates an abort). Fixes: 64863f4ca494 ("rxrpc: Fix unhandled errors in rxgk_verify_packet_integrity()") Closes: https://sashiko.dev/#/patchset/20260422161438.2593376-4-dhowells@redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260423200909.3049438-4-dhowells@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/rxgk_app.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/rxgk_app.c b/net/rxrpc/rxgk_app.c index 30275cb5ba3e2..30b6701a98670 100644 --- a/net/rxrpc/rxgk_app.c +++ b/net/rxrpc/rxgk_app.c @@ -245,6 +245,7 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb, if (ret != -ENOMEM) return rxrpc_abort_conn(conn, skb, ec, ret, rxgk_abort_resp_tok_dec); + return ret; } ret = conn->security->default_decode_ticket(conn, skb, ticket_offset, From 76cb9a2d252274adfae6e293a292434631a7d472 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 23 Apr 2026 21:09:07 +0100 Subject: [PATCH 116/554] rxrpc: Fix re-decryption of RESPONSE packets commit 0422e7a4883f25101903f3e8105c0808aa5f4ce9 upstream. If a RESPONSE packet gets a temporary failure during processing, it may end up in a partially decrypted state - and then get requeued for a retry. Fix this by just discarding the packet; we will send another CHALLENGE packet and thereby elicit a further response. Similarly, discard an incoming CHALLENGE packet if we get an error whilst generating a RESPONSE; the server will send another CHALLENGE. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Closes: https://sashiko.dev/#/patchset/20260422161438.2593376-4-dhowells@redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260423200909.3049438-3-dhowells@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/trace/events/rxrpc.h | 1 - net/rxrpc/conn_event.c | 14 ++------------ 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 13b9d017f8e17..573f2df3a2c99 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -285,7 +285,6 @@ EM(rxrpc_conn_put_unidle, "PUT unidle ") \ EM(rxrpc_conn_put_work, "PUT work ") \ EM(rxrpc_conn_queue_challenge, "QUE chall ") \ - EM(rxrpc_conn_queue_retry_work, "QUE retry-wk") \ EM(rxrpc_conn_queue_rx_work, "QUE rx-work ") \ EM(rxrpc_conn_see_new_service_conn, "SEE new-svc ") \ EM(rxrpc_conn_see_reap_service, "SEE reap-svc") \ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index aee977291d90b..a2130d25aaa9b 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -389,7 +389,6 @@ void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn, bool force) static void rxrpc_do_process_connection(struct rxrpc_connection *conn) { struct sk_buff *skb; - int ret; if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); @@ -398,17 +397,8 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn) * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { rxrpc_see_skb(skb, rxrpc_skb_see_conn_work); - ret = rxrpc_process_event(conn, skb); - switch (ret) { - case -ENOMEM: - case -EAGAIN: - skb_queue_head(&conn->rx_queue, skb); - rxrpc_queue_conn(conn, rxrpc_conn_queue_retry_work); - break; - default: - rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); - break; - } + rxrpc_process_event(conn, skb); + rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); } } From 761c37b761ede21102c067205b6053854a6d2b99 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 23 Apr 2026 21:09:06 +0100 Subject: [PATCH 117/554] rxrpc: Fix rxrpc_input_call_event() to only unshare DATA packets commit 55b2984c96c37f909bbfe8851f13152693951382 upstream. Fix rxrpc_input_call_event() to only unshare DATA packets and not ACK, ABORT, etc.. And with that, rxrpc_input_packet() doesn't need to take a pointer to the pointer to the packet, so change that to just a pointer. Fixes: 1f2740150f90 ("rxrpc: Fix potential UAF after skb_unshare() failure") Closes: https://sashiko.dev/#/patchset/20260422161438.2593376-4-dhowells@redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260423200909.3049438-2-dhowells@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/call_event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index cc8f9dfa44e8a..fdd683261226c 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -332,7 +332,8 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK; - if (sp->hdr.securityIndex != 0 && + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && + sp->hdr.securityIndex != 0 && skb_cloned(skb)) { /* Unshare the packet so that it can be * modified by in-place decryption. From ff591df2cd7f3216fbe87f3112bf686176e0300f Mon Sep 17 00:00:00 2001 From: Prasanna Kumar T S M Date: Sun, 22 Mar 2026 06:11:39 -0700 Subject: [PATCH 118/554] EDAC/versalnet: Fix memory leak in remove and probe error paths commit 1b6f292cb94d95c9bc22e1efe592daf62c60bc2e upstream. The mcdi object allocated using kzalloc() in the setup_mcdi() is not freed in the remove path or in probe's error handling path leading to a memory leak. Fix it by freeing the allocated memory. Fixes: d5fe2fec6c40d ("EDAC: Add a driver for the AMD Versal NET DDR controller") Signed-off-by: Prasanna Kumar T S M Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260322131139.1684716-1-ptsm@linux.microsoft.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/versalnet_edac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c index dce08dc7f32e7..f90723bc93d5c 100644 --- a/drivers/edac/versalnet_edac.c +++ b/drivers/edac/versalnet_edac.c @@ -917,6 +917,7 @@ static int mc_probe(struct platform_device *pdev) err_init: cdx_mcdi_finish(priv->mcdi); + kfree(priv->mcdi); err_unreg: unregister_rpmsg_driver(&amd_rpmsg_driver); @@ -938,6 +939,7 @@ static void mc_remove(struct platform_device *pdev) remove_versalnet(priv); rproc_shutdown(priv->mcdi->r5_rproc); cdx_mcdi_finish(priv->mcdi); + kfree(priv->mcdi); } static const struct of_device_id amd_edac_match[] = { From 491344b826a58f122a851ec792fed20ae3e2318d Mon Sep 17 00:00:00 2001 From: Yiyang Chen Date: Mon, 30 Mar 2026 03:00:41 +0800 Subject: [PATCH 119/554] tools/accounting: handle truncated taskstats netlink messages commit cc82b3dcc6a8fa259fbda12ab00d6fc00908a49e upstream. procacct and getdelays use a fixed receive buffer for taskstats generic netlink messages. A multi-threaded process exit can emit a single PID+TGID notification large enough to exceed that buffer on newer kernels. Switch to recvmsg() so MSG_TRUNC is detected explicitly, increase the message buffer size, and report truncated datagrams clearly instead of misparsing them as fatal netlink errors. Also print the taskstats version in debug output to make version mismatches easier to diagnose while inspecting taskstats traffic. Link: https://lkml.kernel.org/r/520308bb4cbbaf8dc2c7296b5f60f11e12fb30a5.1774810498.git.cyyzero16@gmail.com Signed-off-by: Yiyang Chen Cc: Balbir Singh Cc: Dr. Thomas Orgis Cc: Fan Yu Cc: Wang Yaxin Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- tools/accounting/getdelays.c | 41 ++++++++++++++++++++++++++++++++---- tools/accounting/procacct.c | 40 +++++++++++++++++++++++++++++++---- 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 21cb3c3d1331f..83bc5f722d029 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -59,7 +59,7 @@ int print_task_context_switch_counts; } /* Maximum size of response requested or message sent */ -#define MAX_MSG_SIZE 1024 +#define MAX_MSG_SIZE 2048 /* Maximum number of cpus expected to be specified in a cpumask */ #define MAX_CPUS 32 @@ -114,6 +114,32 @@ static int create_nl_socket(int protocol) return -1; } +static int recv_taskstats_msg(int sd, struct msgtemplate *msg) +{ + struct sockaddr_nl nladdr; + struct iovec iov = { + .iov_base = msg, + .iov_len = sizeof(*msg), + }; + struct msghdr hdr = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + int ret; + + ret = recvmsg(sd, &hdr, 0); + if (ret < 0) + return -1; + if (hdr.msg_flags & MSG_TRUNC) { + errno = EMSGSIZE; + return -1; + } + + return ret; +} + static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, __u8 genl_cmd, __u16 nla_type, @@ -515,12 +541,16 @@ int main(int argc, char *argv[]) } do { - rep_len = recv(nl_sd, &msg, sizeof(msg), 0); + rep_len = recv_taskstats_msg(nl_sd, &msg); PRINTF("received %d bytes\n", rep_len); if (rep_len < 0) { - fprintf(stderr, "nonfatal reply error: errno %d\n", - errno); + if (errno == EMSGSIZE) + fprintf(stderr, + "dropped truncated taskstats netlink message, please increase MAX_MSG_SIZE\n"); + else + fprintf(stderr, "nonfatal reply error: errno %d\n", + errno); continue; } if (msg.n.nlmsg_type == NLMSG_ERROR || @@ -562,6 +592,9 @@ int main(int argc, char *argv[]) printf("TGID\t%d\n", rtid); break; case TASKSTATS_TYPE_STATS: + PRINTF("version %u\n", + ((struct taskstats *) + NLA_DATA(na))->version); if (print_delays) print_delayacct((struct taskstats *) NLA_DATA(na)); if (print_io_accounting) diff --git a/tools/accounting/procacct.c b/tools/accounting/procacct.c index e8dee05a62646..46e5986ad9277 100644 --- a/tools/accounting/procacct.c +++ b/tools/accounting/procacct.c @@ -71,7 +71,7 @@ int print_task_context_switch_counts; } /* Maximum size of response requested or message sent */ -#define MAX_MSG_SIZE 1024 +#define MAX_MSG_SIZE 2048 /* Maximum number of cpus expected to be specified in a cpumask */ #define MAX_CPUS 32 @@ -121,6 +121,32 @@ static int create_nl_socket(int protocol) return -1; } +static int recv_taskstats_msg(int sd, struct msgtemplate *msg) +{ + struct sockaddr_nl nladdr; + struct iovec iov = { + .iov_base = msg, + .iov_len = sizeof(*msg), + }; + struct msghdr hdr = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + int ret; + + ret = recvmsg(sd, &hdr, 0); + if (ret < 0) + return -1; + if (hdr.msg_flags & MSG_TRUNC) { + errno = EMSGSIZE; + return -1; + } + + return ret; +} + static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, __u8 genl_cmd, __u16 nla_type, @@ -239,6 +265,8 @@ void handle_aggr(int mother, struct nlattr *na, int fd) PRINTF("TGID\t%d\n", rtid); break; case TASKSTATS_TYPE_STATS: + PRINTF("version %u\n", + ((struct taskstats *)NLA_DATA(na))->version); if (mother == TASKSTATS_TYPE_AGGR_PID) print_procacct((struct taskstats *) NLA_DATA(na)); if (fd) { @@ -347,12 +375,16 @@ int main(int argc, char *argv[]) } do { - rep_len = recv(nl_sd, &msg, sizeof(msg), 0); + rep_len = recv_taskstats_msg(nl_sd, &msg); PRINTF("received %d bytes\n", rep_len); if (rep_len < 0) { - fprintf(stderr, "nonfatal reply error: errno %d\n", - errno); + if (errno == EMSGSIZE) + fprintf(stderr, + "dropped truncated taskstats netlink message, please increase MAX_MSG_SIZE\n"); + else + fprintf(stderr, "nonfatal reply error: errno %d\n", + errno); continue; } if (msg.n.nlmsg_type == NLMSG_ERROR || From d29cafc7e4ee9e28a150ba17e9a565ec5d881fbc Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 7 Apr 2026 17:40:41 +0800 Subject: [PATCH 120/554] net: txgbe: fix RTNL assertion warning when remove module commit e159f05e12cc1111a3103b99375ddf0dfd0e7d63 upstream. For the copper NIC with external PHY, the driver called phylink_connect_phy() during probe and phylink_disconnect_phy() during remove. It caused an RTNL assertion warning in phylink_disconnect_phy() upon module remove. To fix this, add rtnl_lock() and rtnl_unlock() around the phylink_disconnect_phy() in remove function. ------------[ cut here ]------------ RTNL: assertion failed at drivers/net/phy/phylink.c (2351) WARNING: drivers/net/phy/phylink.c:2351 at phylink_disconnect_phy+0xd8/0xf0 [phylink], CPU#0: rmmod/4464 Modules linked in: ... CPU: 0 UID: 0 PID: 4464 Comm: rmmod Kdump: loaded Not tainted 7.0.0-rc4+ Hardware name: Micro-Star International Co., Ltd. MS-7E16/X670E GAMING PLUS WIFI (MS-7E16), BIOS 1.90 12/31/2024 RIP: 0010:phylink_disconnect_phy+0xe4/0xf0 [phylink] Code: 5b 41 5c 41 5d 41 5e 41 5f 5d 31 c0 31 d2 31 f6 31 ff e9 3a 38 8f e7 48 8d 3d 48 87 e2 ff ba 2f 09 00 00 48 c7 c6 c1 22 24 c0 <67> 48 0f b9 3a e9 34 ff ff ff 66 90 90 90 90 90 90 90 90 90 90 90 RSP: 0018:ffffce7288363ac0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff89654b2a1a00 RCX: 0000000000000000 RDX: 000000000000092f RSI: ffffffffc02422c1 RDI: ffffffffc0239020 RBP: ffffce7288363ae8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8964c4022000 R13: ffff89654fce3028 R14: ffff89654ebb4000 R15: ffffffffc0226348 FS: 0000795e80d93780(0000) GS:ffff896c52857000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005b528b592000 CR3: 0000000170d0f000 CR4: 0000000000f50ef0 PKRU: 55555554 Call Trace: txgbe_remove_phy+0xbb/0xd0 [txgbe] txgbe_remove+0x4c/0xb0 [txgbe] pci_device_remove+0x41/0xb0 device_remove+0x43/0x80 device_release_driver_internal+0x206/0x270 driver_detach+0x4a/0xa0 bus_remove_driver+0x83/0x120 driver_unregister+0x2f/0x60 pci_unregister_driver+0x40/0x90 txgbe_driver_exit+0x10/0x850 [txgbe] __do_sys_delete_module.isra.0+0x1c3/0x2f0 __x64_sys_delete_module+0x12/0x20 x64_sys_call+0x20c3/0x2390 do_syscall_64+0x11c/0x1500 ? srso_alias_return_thunk+0x5/0xfbef5 ? do_syscall_64+0x15a/0x1500 ? srso_alias_return_thunk+0x5/0xfbef5 ? do_fault+0x312/0x580 ? srso_alias_return_thunk+0x5/0xfbef5 ? __handle_mm_fault+0x9d5/0x1040 ? srso_alias_return_thunk+0x5/0xfbef5 ? count_memcg_events+0x101/0x1d0 ? srso_alias_return_thunk+0x5/0xfbef5 ? handle_mm_fault+0x1e8/0x2f0 ? srso_alias_return_thunk+0x5/0xfbef5 ? do_user_addr_fault+0x2f8/0x820 ? srso_alias_return_thunk+0x5/0xfbef5 ? irqentry_exit+0xb2/0x600 ? srso_alias_return_thunk+0x5/0xfbef5 ? exc_page_fault+0x92/0x1c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 02b2a6f91b90 ("net: txgbe: support copper NIC with external PHY") Cc: stable@vger.kernel.org Signed-off-by: Jiawen Wu Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/8B47A5872884147D+20260407094041.4646-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 03f1b9bc604d5..9b6f5cd3fdd7d 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -659,7 +659,9 @@ void txgbe_remove_phy(struct txgbe *txgbe) return; case wx_mac_sp: if (txgbe->wx->media_type == wx_media_copper) { + rtnl_lock(); phylink_disconnect_phy(txgbe->wx->phylink); + rtnl_unlock(); phylink_destroy(txgbe->wx->phylink); return; } From 7ef5b521785baf5ce8e0fd101f2a1d636ee0d199 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Tue, 27 Jan 2026 13:32:15 +0100 Subject: [PATCH 121/554] arm64: dts: marvell: uDPU: add ethernet aliases commit 38f09c97340cd23f976242e6cb1e7aa4c8ed28d0 upstream. On eDPU plus, which is an updated revision of eDPU which uses an external MV88E6361 switch we are relying on U-Boot to detect the board, and then enable and disable the required nodes for that revision. However, it seems that I missed adding the required aliases for ethernet controllers, and this worked as in OpenWrt we had added those locally. Cc: stable@vger.kernel.org Fixes: 660b8b2f3944 ("arm64: dts: marvell: eDPU: add support for version with external switch") Signed-off-by: Robert Marko Signed-off-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi index 2428208457078..cd856c0aba71e 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi @@ -15,6 +15,11 @@ #include "armada-372x.dtsi" / { + aliases { + ethernet0 = ð0; + ethernet1 = ð1; + }; + chosen { stdout-path = "serial0:115200n8"; }; From 154fc7fe3f62c46891c3c4302f4b5b5391c932e6 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 9 Apr 2026 23:04:14 +0530 Subject: [PATCH 122/554] net: qrtr: ns: Free the node during ctrl_cmd_bye() commit 68efba36446a7774ea5b971257ade049272a07ac upstream. A node sends the BYE packet when it is about to go down. So the nameserver should advertise the removal of the node to all remote and local observers and free the node finally. But currently, the nameserver doesn't free the node memory even after processing the BYE packet. This causes the node memory to leak. Hence, remove the node from Xarray list and free the node memory during both success and failure case of ctrl_cmd_bye(). Cc: stable@vger.kernel.org Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260409-qrtr-fix-v3-3-00a8a5ff2b51@oss.qualcomm.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/qrtr/ns.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index e5b3dac7836b1..2018a71a5c4c1 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -342,7 +342,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) struct qrtr_node *node; unsigned long index; struct kvec iv; - int ret; + int ret = 0; iv.iov_base = &pkt; iv.iov_len = sizeof(pkt); @@ -357,8 +357,10 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) /* Advertise the removal of this client to all local servers */ local_node = node_get(qrtr_ns.local_node); - if (!local_node) - return 0; + if (!local_node) { + ret = 0; + goto delete_node; + } memset(&pkt, 0, sizeof(pkt)); pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE); @@ -375,10 +377,18 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); if (ret < 0 && ret != -ENODEV) { pr_err("failed to send bye cmd\n"); - return ret; + goto delete_node; } } - return 0; + + /* Ignore -ENODEV */ + ret = 0; + +delete_node: + xa_erase(&nodes, from->sq_node); + kfree(node); + + return ret; } static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, From 033370ffb3c9c0264d19f8ba9ef769523266589a Mon Sep 17 00:00:00 2001 From: Ao Zhou Date: Wed, 22 Apr 2026 22:52:07 +0800 Subject: [PATCH 123/554] net: rds: fix MR cleanup on copy error commit 8141a2dc70080eda1aedc0389ed2db2b292af5bd upstream. __rds_rdma_map() hands sg/pages ownership to the transport after get_mr() succeeds. If copying the generated cookie back to user space fails after that point, the error path must not free those resources again before dropping the MR reference. Remove the duplicate unpin/free from the put_user() failure branch so that MR teardown is handled only through the existing final cleanup path. Fixes: 0d4597c8c5ab ("net/rds: Track user mapped pages through special API") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Ao Zhou Signed-off-by: Ren Wei Reviewed-by: Allison Henderson Link: https://patch.msgid.link/79c8ef73ec8e5844d71038983940cc2943099baf.1776764247.git.draw51280@163.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rds/rdma.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 00dbcd4d28e68..34d9333e4229f 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -326,10 +326,6 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long)args->cookie_addr)) { - if (!need_odp) { - unpin_user_pages(pages, nr_pages); - kfree(sg); - } ret = -EFAULT; goto out; } From 1c2afc4a1dd1d3f2a9b0e27bf2ada030e499c736 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Wed, 22 Apr 2026 15:18:37 +0800 Subject: [PATCH 124/554] net: txgbe: fix firmware version check commit c263f644add3d6ad81f9d62a99284fde408f0caa upstream. For the device SP, the firmware version is a 32-bit value where the lower 20 bits represent the base version number. And the customized firmware version populates the upper 12 bits with a specific identification number. For other devices AML 25G and 40G, the upper 12 bits of the firmware version is always non-zero, and they have other naming conventions. Only SP devices need to check this to tell if XPCS will work properly. So the judgement of MAC type is added here. And the original logic compared the entire 32-bit value against 0x20010, which caused the outdated base firmwares bypass the version check without a warning. Apply a mask 0xfffff to isolate the lower 20 bits for an accurate base version comparison. Fixes: ab928c24e6cd ("net: txgbe: add FW version warning") Cc: stable@vger.kernel.org Signed-off-by: Jiawen Wu Reviewed-by: Jacob Keller Link: https://patch.msgid.link/C787AA5C07598B13+20260422071837.372731-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index c4c4d70d8466a..1377ea90a8c28 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -867,7 +867,8 @@ static int txgbe_probe(struct pci_dev *pdev, "0x%08x", etrack_id); } - if (etrack_id < 0x20010) + if (wx->mac.type == wx_mac_sp && + ((etrack_id & 0xfffff) < 0x20010)) dev_warn(&pdev->dev, "Please upgrade the firmware to 0x20010 or above.\n"); err = txgbe_test_hostif(wx); From ea0b5d0fe96356dce38f98375a57c52a04e13712 Mon Sep 17 00:00:00 2001 From: Ruijie Li Date: Wed, 22 Apr 2026 23:40:18 +0800 Subject: [PATCH 125/554] net/smc: avoid early lgr access in smc_clc_wait_msg commit 5a8db80f721deee8e916c2cfdee78decda02ce4f upstream. A CLC decline can be received while the handshake is still in an early stage, before the connection has been associated with a link group. The decline handling in smc_clc_wait_msg() updates link-group level sync state for first-contact declines, but that state only exists after link group setup has completed. Guard the link-group update accordingly and keep the per-socket peer diagnosis handling unchanged. This preserves the existing sync_err handling for established link-group contexts and avoids touching link-group state before it is available. Fixes: 0cfdd8f92cac ("smc: connection and link group creation") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Ruijie Li Signed-off-by: Ren Wei Reviewed-by: Dust Li Link: https://patch.msgid.link/08c68a5c817acf198cce63d22517e232e8d60718.1776850759.git.ruijieli51@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/smc/smc_clc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 87c87edadde71..9772b466c72a4 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -788,8 +788,8 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, dclc = (struct smc_clc_msg_decline *)clcm; reason_code = SMC_CLC_DECL_PEERDECL; smc->peer_diagnosis = ntohl(dclc->peer_diagnosis); - if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 & - SMC_FIRST_CONTACT_MASK) { + if ((dclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK) && + smc->conn.lgr) { smc->conn.lgr->sync_err = 1; smc_lgr_terminate_sched(smc->conn.lgr); } From be8aad558b4675f45b43080f81a9ffdeddea73a5 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 16 Apr 2026 01:09:44 +0200 Subject: [PATCH 126/554] net: ks8851: Reinstate disabling of BHs around IRQ handler commit 5c9fcac3c872224316714d0d8914d9af16c76a6d upstream. If the driver executes ks8851_irq() AND a TX packet has been sent, then the driver enables TX queue via netif_wake_queue() which schedules TX softirq to queue packets for this device. If CONFIG_PREEMPT_RT=y is set AND a packet has also been received by the MAC, then ks8851_rx_pkts() calls netdev_alloc_skb_ip_align() to allocate SKBs for the received packets. If netdev_alloc_skb_ip_align() is called with BH enabled, then local_bh_enable() at the end of netdev_alloc_skb_ip_align() will trigger the pending softirq processing, which may ultimately call the .xmit callback ks8851_start_xmit_par(). The ks8851_start_xmit_par() will try to lock struct ks8851_net_par .lock spinlock, which is already locked by ks8851_irq() from which ks8851_start_xmit_par() was called. This leads to a deadlock, which is reported by the kernel, including a trace listed below. If CONFIG_PREEMPT_RT is not set, then since commit 0913ec336a6c0 ("net: ks8851: Fix deadlock with the SPI chip variant") the deadlock can also be triggered without received packet in the RX FIFO. The pending softirqs will be processed on return from spin_unlock_bh(&ks->statelock) in ks8851_irq(), which triggers the deadlock as well. Fix the problem by disabling BH around critical sections, including the IRQ handler, thus preventing the net_tx_action() softirq from triggering during these critical sections. The net_tx_action() softirq is triggered once BH are re-enabled and at the end of the IRQ handler, once all the other IRQ handler actions have been completed. __schedule from schedule_rtlock+0x1c/0x34 schedule_rtlock from rtlock_slowlock_locked+0x548/0x904 rtlock_slowlock_locked from rt_spin_lock+0x60/0x9c rt_spin_lock from ks8851_start_xmit_par+0x74/0x1a8 ks8851_start_xmit_par from netdev_start_xmit+0x20/0x44 netdev_start_xmit from dev_hard_start_xmit+0xd0/0x188 dev_hard_start_xmit from sch_direct_xmit+0xb8/0x25c sch_direct_xmit from __qdisc_run+0x1f8/0x4ec __qdisc_run from qdisc_run+0x1c/0x28 qdisc_run from net_tx_action+0x1f0/0x268 net_tx_action from handle_softirqs+0x1a4/0x270 handle_softirqs from __local_bh_enable_ip+0xcc/0xe0 __local_bh_enable_ip from __alloc_skb+0xd8/0x128 __alloc_skb from __netdev_alloc_skb+0x3c/0x19c __netdev_alloc_skb from ks8851_irq+0x388/0x4d4 ks8851_irq from irq_thread_fn+0x24/0x64 irq_thread_fn from irq_thread+0x178/0x28c irq_thread from kthread+0x12c/0x138 kthread from ret_from_fork+0x14/0x28 Reviewed-by: Sebastian Andrzej Siewior Fixes: e0863634bf9f ("net: ks8851: Queue RX packets in IRQ handler instead of disabling BHs") Cc: stable@vger.kernel.org Signed-off-by: Marek Vasut Link: https://patch.msgid.link/20260415231020.455298-1-marex@nabladev.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/micrel/ks8851.h | 6 +- drivers/net/ethernet/micrel/ks8851_common.c | 64 +++++++++------------ drivers/net/ethernet/micrel/ks8851_par.c | 15 ++--- drivers/net/ethernet/micrel/ks8851_spi.c | 11 ++-- 4 files changed, 38 insertions(+), 58 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h index 31f75b4a67fd7..b795a3a605711 100644 --- a/drivers/net/ethernet/micrel/ks8851.h +++ b/drivers/net/ethernet/micrel/ks8851.h @@ -408,10 +408,8 @@ struct ks8851_net { struct gpio_desc *gpio; struct mii_bus *mii_bus; - void (*lock)(struct ks8851_net *ks, - unsigned long *flags); - void (*unlock)(struct ks8851_net *ks, - unsigned long *flags); + void (*lock)(struct ks8851_net *ks); + void (*unlock)(struct ks8851_net *ks); unsigned int (*rdreg16)(struct ks8851_net *ks, unsigned int reg); void (*wrreg16)(struct ks8851_net *ks, diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index bb5138806c3ff..51c28abdf664f 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -28,25 +28,23 @@ /** * ks8851_lock - register access lock * @ks: The chip state - * @flags: Spinlock flags * * Claim chip register access lock */ -static void ks8851_lock(struct ks8851_net *ks, unsigned long *flags) +static void ks8851_lock(struct ks8851_net *ks) { - ks->lock(ks, flags); + ks->lock(ks); } /** * ks8851_unlock - register access unlock * @ks: The chip state - * @flags: Spinlock flags * * Release chip register access lock */ -static void ks8851_unlock(struct ks8851_net *ks, unsigned long *flags) +static void ks8851_unlock(struct ks8851_net *ks) { - ks->unlock(ks, flags); + ks->unlock(ks); } /** @@ -129,11 +127,10 @@ static void ks8851_set_powermode(struct ks8851_net *ks, unsigned pwrmode) static int ks8851_write_mac_addr(struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); - unsigned long flags; u16 val; int i; - ks8851_lock(ks, &flags); + ks8851_lock(ks); /* * Wake up chip in case it was powered off when stopped; otherwise, @@ -149,7 +146,7 @@ static int ks8851_write_mac_addr(struct net_device *dev) if (!netif_running(dev)) ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN); - ks8851_unlock(ks, &flags); + ks8851_unlock(ks); return 0; } @@ -163,12 +160,11 @@ static int ks8851_write_mac_addr(struct net_device *dev) static void ks8851_read_mac_addr(struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); - unsigned long flags; u8 addr[ETH_ALEN]; u16 reg; int i; - ks8851_lock(ks, &flags); + ks8851_lock(ks); for (i = 0; i < ETH_ALEN; i += 2) { reg = ks8851_rdreg16(ks, KS_MAR(i)); @@ -177,7 +173,7 @@ static void ks8851_read_mac_addr(struct net_device *dev) } eth_hw_addr_set(dev, addr); - ks8851_unlock(ks, &flags); + ks8851_unlock(ks); } /** @@ -312,11 +308,10 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) { struct ks8851_net *ks = _ks; struct sk_buff_head rxq; - unsigned long flags; unsigned int status; struct sk_buff *skb; - ks8851_lock(ks, &flags); + ks8851_lock(ks); status = ks8851_rdreg16(ks, KS_ISR); ks8851_wrreg16(ks, KS_ISR, status); @@ -373,7 +368,7 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) ks8851_wrreg16(ks, KS_RXCR1, rxc->rxcr1); } - ks8851_unlock(ks, &flags); + ks8851_unlock(ks); if (status & IRQ_LCI) mii_check_link(&ks->mii); @@ -405,7 +400,6 @@ static void ks8851_flush_tx_work(struct ks8851_net *ks) static int ks8851_net_open(struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); - unsigned long flags; int ret; ret = request_threaded_irq(dev->irq, NULL, ks8851_irq, @@ -418,7 +412,7 @@ static int ks8851_net_open(struct net_device *dev) /* lock the card, even if we may not actually be doing anything * else at the moment */ - ks8851_lock(ks, &flags); + ks8851_lock(ks); netif_dbg(ks, ifup, ks->netdev, "opening\n"); @@ -471,7 +465,7 @@ static int ks8851_net_open(struct net_device *dev) netif_dbg(ks, ifup, ks->netdev, "network device up\n"); - ks8851_unlock(ks, &flags); + ks8851_unlock(ks); mii_check_link(&ks->mii); return 0; } @@ -487,23 +481,22 @@ static int ks8851_net_open(struct net_device *dev) static int ks8851_net_stop(struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); - unsigned long flags; netif_info(ks, ifdown, dev, "shutting down\n"); netif_stop_queue(dev); - ks8851_lock(ks, &flags); + ks8851_lock(ks); /* turn off the IRQs and ack any outstanding */ ks8851_wrreg16(ks, KS_IER, 0x0000); ks8851_wrreg16(ks, KS_ISR, 0xffff); - ks8851_unlock(ks, &flags); + ks8851_unlock(ks); /* stop any outstanding work */ ks8851_flush_tx_work(ks); flush_work(&ks->rxctrl_work); - ks8851_lock(ks, &flags); + ks8851_lock(ks); /* shutdown RX process */ ks8851_wrreg16(ks, KS_RXCR1, 0x0000); @@ -512,7 +505,7 @@ static int ks8851_net_stop(struct net_device *dev) /* set powermode to soft power down to save power */ ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN); - ks8851_unlock(ks, &flags); + ks8851_unlock(ks); /* ensure any queued tx buffers are dumped */ while (!skb_queue_empty(&ks->txq)) { @@ -566,14 +559,13 @@ static netdev_tx_t ks8851_start_xmit(struct sk_buff *skb, static void ks8851_rxctrl_work(struct work_struct *work) { struct ks8851_net *ks = container_of(work, struct ks8851_net, rxctrl_work); - unsigned long flags; - ks8851_lock(ks, &flags); + ks8851_lock(ks); /* need to shutdown RXQ before modifying filter parameters */ ks8851_wrreg16(ks, KS_RXCR1, 0x00); - ks8851_unlock(ks, &flags); + ks8851_unlock(ks); } static void ks8851_set_rx_mode(struct net_device *dev) @@ -780,7 +772,6 @@ static int ks8851_set_eeprom(struct net_device *dev, { struct ks8851_net *ks = netdev_priv(dev); int offset = ee->offset; - unsigned long flags; int len = ee->len; u16 tmp; @@ -794,7 +785,7 @@ static int ks8851_set_eeprom(struct net_device *dev, if (!(ks->rc_ccr & CCR_EEPROM)) return -ENOENT; - ks8851_lock(ks, &flags); + ks8851_lock(ks); ks8851_eeprom_claim(ks); @@ -817,7 +808,7 @@ static int ks8851_set_eeprom(struct net_device *dev, eeprom_93cx6_wren(&ks->eeprom, false); ks8851_eeprom_release(ks); - ks8851_unlock(ks, &flags); + ks8851_unlock(ks); return 0; } @@ -827,7 +818,6 @@ static int ks8851_get_eeprom(struct net_device *dev, { struct ks8851_net *ks = netdev_priv(dev); int offset = ee->offset; - unsigned long flags; int len = ee->len; /* must be 2 byte aligned */ @@ -837,7 +827,7 @@ static int ks8851_get_eeprom(struct net_device *dev, if (!(ks->rc_ccr & CCR_EEPROM)) return -ENOENT; - ks8851_lock(ks, &flags); + ks8851_lock(ks); ks8851_eeprom_claim(ks); @@ -845,7 +835,7 @@ static int ks8851_get_eeprom(struct net_device *dev, eeprom_93cx6_multiread(&ks->eeprom, offset/2, (__le16 *)data, len/2); ks8851_eeprom_release(ks); - ks8851_unlock(ks, &flags); + ks8851_unlock(ks); return 0; } @@ -904,7 +894,6 @@ static int ks8851_phy_reg(int reg) static int ks8851_phy_read_common(struct net_device *dev, int phy_addr, int reg) { struct ks8851_net *ks = netdev_priv(dev); - unsigned long flags; int result; int ksreg; @@ -912,9 +901,9 @@ static int ks8851_phy_read_common(struct net_device *dev, int phy_addr, int reg) if (ksreg < 0) return ksreg; - ks8851_lock(ks, &flags); + ks8851_lock(ks); result = ks8851_rdreg16(ks, ksreg); - ks8851_unlock(ks, &flags); + ks8851_unlock(ks); return result; } @@ -949,14 +938,13 @@ static void ks8851_phy_write(struct net_device *dev, int phy, int reg, int value) { struct ks8851_net *ks = netdev_priv(dev); - unsigned long flags; int ksreg; ksreg = ks8851_phy_reg(reg); if (ksreg >= 0) { - ks8851_lock(ks, &flags); + ks8851_lock(ks); ks8851_wrreg16(ks, ksreg, value); - ks8851_unlock(ks, &flags); + ks8851_unlock(ks); } } diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c index 78695be2570bf..9f1c33f6ddec0 100644 --- a/drivers/net/ethernet/micrel/ks8851_par.c +++ b/drivers/net/ethernet/micrel/ks8851_par.c @@ -55,29 +55,27 @@ struct ks8851_net_par { /** * ks8851_lock_par - register access lock * @ks: The chip state - * @flags: Spinlock flags * * Claim chip register access lock */ -static void ks8851_lock_par(struct ks8851_net *ks, unsigned long *flags) +static void ks8851_lock_par(struct ks8851_net *ks) { struct ks8851_net_par *ksp = to_ks8851_par(ks); - spin_lock_irqsave(&ksp->lock, *flags); + spin_lock_bh(&ksp->lock); } /** * ks8851_unlock_par - register access unlock * @ks: The chip state - * @flags: Spinlock flags * * Release chip register access lock */ -static void ks8851_unlock_par(struct ks8851_net *ks, unsigned long *flags) +static void ks8851_unlock_par(struct ks8851_net *ks) { struct ks8851_net_par *ksp = to_ks8851_par(ks); - spin_unlock_irqrestore(&ksp->lock, *flags); + spin_unlock_bh(&ksp->lock); } /** @@ -233,7 +231,6 @@ static netdev_tx_t ks8851_start_xmit_par(struct sk_buff *skb, { struct ks8851_net *ks = netdev_priv(dev); netdev_tx_t ret = NETDEV_TX_OK; - unsigned long flags; unsigned int txqcr; u16 txmir; int err; @@ -241,7 +238,7 @@ static netdev_tx_t ks8851_start_xmit_par(struct sk_buff *skb, netif_dbg(ks, tx_queued, ks->netdev, "%s: skb %p, %d@%p\n", __func__, skb, skb->len, skb->data); - ks8851_lock_par(ks, &flags); + ks8851_lock_par(ks); txmir = ks8851_rdreg16_par(ks, KS_TXMIR) & 0x1fff; @@ -262,7 +259,7 @@ static netdev_tx_t ks8851_start_xmit_par(struct sk_buff *skb, ret = NETDEV_TX_BUSY; } - ks8851_unlock_par(ks, &flags); + ks8851_unlock_par(ks); return ret; } diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index c862b13b447a5..28a7ff7644bb5 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -71,11 +71,10 @@ struct ks8851_net_spi { /** * ks8851_lock_spi - register access lock * @ks: The chip state - * @flags: Spinlock flags * * Claim chip register access lock */ -static void ks8851_lock_spi(struct ks8851_net *ks, unsigned long *flags) +static void ks8851_lock_spi(struct ks8851_net *ks) { struct ks8851_net_spi *kss = to_ks8851_spi(ks); @@ -85,11 +84,10 @@ static void ks8851_lock_spi(struct ks8851_net *ks, unsigned long *flags) /** * ks8851_unlock_spi - register access unlock * @ks: The chip state - * @flags: Spinlock flags * * Release chip register access lock */ -static void ks8851_unlock_spi(struct ks8851_net *ks, unsigned long *flags) +static void ks8851_unlock_spi(struct ks8851_net *ks) { struct ks8851_net_spi *kss = to_ks8851_spi(ks); @@ -309,7 +307,6 @@ static void ks8851_tx_work(struct work_struct *work) struct ks8851_net_spi *kss; unsigned short tx_space; struct ks8851_net *ks; - unsigned long flags; struct sk_buff *txb; bool last; @@ -317,7 +314,7 @@ static void ks8851_tx_work(struct work_struct *work) ks = &kss->ks8851; last = skb_queue_empty(&ks->txq); - ks8851_lock_spi(ks, &flags); + ks8851_lock_spi(ks); while (!last) { txb = skb_dequeue(&ks->txq); @@ -343,7 +340,7 @@ static void ks8851_tx_work(struct work_struct *work) ks->tx_space = tx_space; spin_unlock_bh(&ks->statelock); - ks8851_unlock_spi(ks, &flags); + ks8851_unlock_spi(ks); } /** From 5424e678f9b304e148cf5dcc047cffc7a56a3bb5 Mon Sep 17 00:00:00 2001 From: Zhengchuan Liang Date: Mon, 13 Apr 2026 17:08:46 +0800 Subject: [PATCH 127/554] net: bridge: use a stable FDB dst snapshot in RCU readers commit df4601653201de21b487c3e7fffd464790cab808 upstream. Local FDB entries can be rewritten in place by `fdb_delete_local()`, which updates `f->dst` to another port or to `NULL` while keeping the entry alive. Several bridge RCU readers inspect `f->dst`, including `br_fdb_fillbuf()` through the `brforward_read()` sysfs path. These readers currently load `f->dst` multiple times and can therefore observe inconsistent values across the check and later dereference. In `br_fdb_fillbuf()`, this means a concurrent local-FDB update can change `f->dst` after the NULL check and before the `port_no` dereference, leading to a NULL-ptr-deref. Fix this by taking a single `READ_ONCE()` snapshot of `f->dst` in each affected RCU reader and using that snapshot for the rest of the access sequence. Also publish the in-place `f->dst` updates in `fdb_delete_local()` with `WRITE_ONCE()` so the readers and writer use matching access patterns. Fixes: 960b589f86c7 ("bridge: Properly check if local fdb entry can be deleted in br_fdb_change_mac_address") Cc: stable@kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Ren Wei Signed-off-by: Zhengchuan Liang Signed-off-by: Ren Wei Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/6570fabb85ecadb8baaf019efe856f407711c7b9.1776043229.git.zcliangcn@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_arp_nd_proxy.c | 8 +++++--- net/bridge/br_fdb.c | 28 ++++++++++++++++++---------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index 6b5595868a39c..7ace0f4941bb6 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -202,11 +202,12 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, f = br_fdb_find_rcu(br, n->ha, vid); if (f) { + const struct net_bridge_port *dst = READ_ONCE(f->dst); bool replied = false; if ((p && (p->flags & BR_PROXYARP)) || - (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)) || - br_is_neigh_suppress_enabled(f->dst, vid)) { + (dst && (dst->flags & BR_PROXYARP_WIFI)) || + br_is_neigh_suppress_enabled(dst, vid)) { if (!vid) br_arp_send(br, p, skb->dev, sip, tip, sha, n->ha, sha, 0, 0); @@ -470,9 +471,10 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, f = br_fdb_find_rcu(br, n->ha, vid); if (f) { + const struct net_bridge_port *dst = READ_ONCE(f->dst); bool replied = false; - if (br_is_neigh_suppress_enabled(f->dst, vid)) { + if (br_is_neigh_suppress_enabled(dst, vid)) { if (vid != 0) br_nd_send(br, p, skb, n, skb->vlan_proto, diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e2c17f620f009..6eb3ab69a5140 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -236,6 +236,7 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev, const unsigned char *addr, __u16 vid) { + const struct net_bridge_port *dst; struct net_bridge_fdb_entry *f; struct net_device *dev = NULL; struct net_bridge *br; @@ -248,8 +249,11 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev, br = netdev_priv(br_dev); rcu_read_lock(); f = br_fdb_find_rcu(br, addr, vid); - if (f && f->dst) - dev = f->dst->dev; + if (f) { + dst = READ_ONCE(f->dst); + if (dst) + dev = dst->dev; + } rcu_read_unlock(); return dev; @@ -346,7 +350,7 @@ static void fdb_delete_local(struct net_bridge *br, vg = nbp_vlan_group(op); if (op != p && ether_addr_equal(op->dev->dev_addr, addr) && (!vid || br_vlan_find(vg, vid))) { - f->dst = op; + WRITE_ONCE(f->dst, op); clear_bit(BR_FDB_ADDED_BY_USER, &f->flags); return; } @@ -357,7 +361,7 @@ static void fdb_delete_local(struct net_bridge *br, /* Maybe bridge device has same hw addr? */ if (p && ether_addr_equal(br->dev->dev_addr, addr) && (!vid || (v && br_vlan_should_use(v)))) { - f->dst = NULL; + WRITE_ONCE(f->dst, NULL); clear_bit(BR_FDB_ADDED_BY_USER, &f->flags); return; } @@ -928,6 +932,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long maxnum, unsigned long skip) { + const struct net_bridge_port *dst; struct net_bridge_fdb_entry *f; struct __fdb_entry *fe = buf; unsigned long delta; @@ -944,7 +949,8 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, continue; /* ignore pseudo entry for local MAC address */ - if (!f->dst) + dst = READ_ONCE(f->dst); + if (!dst) continue; if (skip) { @@ -956,8 +962,8 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, memcpy(fe->mac_addr, f->key.addr.addr, ETH_ALEN); /* due to ABI compat need to split into hi/lo */ - fe->port_no = f->dst->port_no; - fe->port_hi = f->dst->port_no >> 8; + fe->port_no = dst->port_no; + fe->port_hi = dst->port_no >> 8; fe->is_local = test_bit(BR_FDB_LOCAL, &f->flags); if (!test_bit(BR_FDB_STATIC, &f->flags)) { @@ -1083,9 +1089,11 @@ int br_fdb_dump(struct sk_buff *skb, rcu_read_lock(); hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + const struct net_bridge_port *dst = READ_ONCE(f->dst); + if (*idx < ctx->fdb_idx) goto skip; - if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) { + if (filter_dev && (!dst || dst->dev != filter_dev)) { if (filter_dev != dev) goto skip; /* !f->dst is a special case for bridge @@ -1093,10 +1101,10 @@ int br_fdb_dump(struct sk_buff *skb, * Therefore need a little more filtering * we only want to dump the !f->dst case */ - if (f->dst) + if (dst) goto skip; } - if (!filter_dev && f->dst) + if (!filter_dev && dst) goto skip; err = fdb_fill_info(skb, br, f, From 20ba739bd62775a407a94fb1d78a7ce983bde636 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 20 Apr 2026 03:18:36 -0700 Subject: [PATCH 128/554] netconsole: avoid out-of-bounds access on empty string in trim_newline() commit 7079c8c13f2d33992bc846240517d88f4ab07781 upstream. trim_newline() unconditionally dereferences s[len - 1] after computing len = strnlen(s, maxlen). When the string is empty, len is 0 and the expression underflows to s[(size_t)-1], reading (and potentially writing) one byte before the buffer. The two callers feed trim_newline() with the result of strscpy() from configfs store callbacks (dev_name_store, userdatum_value_store). configfs guarantees count >= 1 reaches the callback, but the byte itself can be NUL: a userspace write(fd, "\0", 1) leaves the destination empty after strscpy() and triggers the underflow. The OOB write only fires if the adjacent byte happens to be '\n', so this is not a security issue, but the access is undefined behaviour either way. This pattern is commonly flagged by LLM-based code reviewers. While it is not a security fix, the underlying access is undefined behaviour and the change is small and self-contained, so it is a reasonable candidate for the stable trees. Guard the dereference on a non-zero length. Fixes: ae001dc67907 ("net: netconsole: move newline trimming to function") Cc: stable@vger.kernel.org Signed-off-by: Breno Leitao Reviewed-by: Gustavo Luiz Duarte Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260420-netcons_trim_newline-v1-1-dc35889aeedf@debian.org Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/netconsole.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 7288b25d6b8c4..7523f7763d36a 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -383,6 +383,8 @@ static void trim_newline(char *s, size_t maxlen) size_t len; len = strnlen(s, maxlen); + if (!len) + return; if (s[len - 1] == '\n') s[len - 1] = '\0'; } From f826f0000100511707f85eb74c01fb2973f6d27a Mon Sep 17 00:00:00 2001 From: Yuan Zhaoming Date: Fri, 17 Apr 2026 22:13:40 +0800 Subject: [PATCH 129/554] net: mctp: fix don't require received header reserved bits to be zero commit a663bac71a2f0b3ac6c373168ca57b2a6e6381aa upstream. From the MCTP Base specification (DSP0236 v1.2.1), the first byte of the MCTP header contains a 4 bit reserved field, and 4 bit version. On our current receive path, we require those 4 reserved bits to be zero, but the 9500-8i card is non-conformant, and may set these reserved bits. DSP0236 states that the reserved bits must be written as zero, and ignored when read. While the device might not conform to the former, we should accept these message to conform to the latter. Relax our check on the MCTP version byte to allow non-zero bits in the reserved field. Fixes: 889b7da23abf ("mctp: Add initial routing framework") Signed-off-by: Yuan Zhaoming Cc: stable@vger.kernel.org Acked-by: Jeremy Kerr Link: https://patch.msgid.link/20260417141340.5306-1-yuanzhaoming901030@126.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/mctp.h | 3 +++ net/mctp/route.c | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/net/mctp.h b/include/net/mctp.h index c3207ce98f07f..4c0e4655b6c7b 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -26,6 +26,9 @@ struct mctp_hdr { #define MCTP_VER_MIN 1 #define MCTP_VER_MAX 1 +/* Definitions for ver field */ +#define MCTP_HDR_VER_MASK GENMASK(3, 0) + /* Definitions for flags_seq_tag field */ #define MCTP_HDR_FLAG_SOM BIT(7) #define MCTP_HDR_FLAG_EOM BIT(6) diff --git a/net/mctp/route.c b/net/mctp/route.c index bee225c821ed8..d4fdaac8037ab 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -441,6 +441,7 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) unsigned long f; u8 tag, flags; int rc; + u8 ver; msk = NULL; rc = -EINVAL; @@ -467,7 +468,8 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) netid = mctp_cb(skb)->net; skb_pull(skb, sizeof(struct mctp_hdr)); - if (mh->ver != 1) + ver = mh->ver & MCTP_HDR_VER_MASK; + if (ver < MCTP_VER_MIN || ver > MCTP_VER_MAX) goto out; flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM); @@ -1325,6 +1327,7 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, struct mctp_dst dst; struct mctp_hdr *mh; int rc; + u8 ver; rcu_read_lock(); mdev = __mctp_dev_get(dev); @@ -1342,7 +1345,8 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, /* We have enough for a header; decode and route */ mh = mctp_hdr(skb); - if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) + ver = mh->ver & MCTP_HDR_VER_MASK; + if (ver < MCTP_VER_MIN || ver > MCTP_VER_MAX) goto err_drop; /* source must be valid unicast or null; drop reserved ranges and From e9be7d2fb0b1c96540f0f61e17b9c7256c73f054 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 16 Apr 2026 01:09:45 +0200 Subject: [PATCH 130/554] net: ks8851: Avoid excess softirq scheduling commit 22230e68b2cf1ab6b027be8cf1198164a949c4fa upstream. The code injects a packet into netif_rx() repeatedly, which will add it to its internal NAPI and schedule a softirq, and process it. It is more efficient to queue multiple packets and process them all at the local_bh_enable() time. Reviewed-by: Sebastian Andrzej Siewior Fixes: e0863634bf9f ("net: ks8851: Queue RX packets in IRQ handler instead of disabling BHs") Cc: stable@vger.kernel.org Signed-off-by: Marek Vasut Link: https://patch.msgid.link/20260415231020.455298-2-marex@nabladev.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/micrel/ks8851_common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 51c28abdf664f..0369e80b01bec 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -373,9 +373,12 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) if (status & IRQ_LCI) mii_check_link(&ks->mii); - if (status & IRQ_RXI) + if (status & IRQ_RXI) { + local_bh_disable(); while ((skb = __skb_dequeue(&rxq))) netif_rx(skb); + local_bh_enable(); + } return IRQ_HANDLED; } From 9eed57e9defdb9054eacb0411244255df0bd043c Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Thu, 2 Apr 2026 18:42:20 +0200 Subject: [PATCH 131/554] drm/arcpgu: fix device node leak commit ad3ac32a3893a2bbcad545efc005a8e4e7ecf10c upstream. This function gets a device_node reference via of_graph_get_remote_port_parent() and stores it in encoder_node, but never puts that reference. Add it. There used to be a of_node_put(encoder_node) but it has been removed by mistake during a rework in commit 3ea66a794fdc ("drm/arc: Inline arcpgu_drm_hdmi_init"). Fixes: 3ea66a794fdc ("drm/arc: Inline arcpgu_drm_hdmi_init") Cc: stable@vger.kernel.org Reviewed-by: Louis Chauvet Link: https://patch.msgid.link/20260402-drm-arcgpu-fix-device-node-leak-v2-1-d773cf754ae5@bootlin.com Signed-off-by: Luca Ceresoli Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/tiny/arcpgu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tiny/arcpgu.c b/drivers/gpu/drm/tiny/arcpgu.c index 7cf0f0ea1bfe4..c74466ea2535e 100644 --- a/drivers/gpu/drm/tiny/arcpgu.c +++ b/drivers/gpu/drm/tiny/arcpgu.c @@ -250,7 +250,8 @@ DEFINE_DRM_GEM_DMA_FOPS(arcpgu_drm_ops); static int arcpgu_load(struct arcpgu_drm_private *arcpgu) { struct platform_device *pdev = to_platform_device(arcpgu->drm.dev); - struct device_node *encoder_node = NULL, *endpoint_node = NULL; + struct device_node *encoder_node __free(device_node) = NULL; + struct device_node *endpoint_node = NULL; struct drm_connector *connector = NULL; struct drm_device *drm = &arcpgu->drm; int ret; From 38387ccc0fbe38d14fb4c2ad7ee1d7404e5e59fd Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 16 Apr 2026 15:25:07 +0200 Subject: [PATCH 132/554] slub: fix data loss and overflow in krealloc() commit 082a6d03a2d685a83a332666b500ad3966349588 upstream. Commit 2cd8231796b5 ("mm/slub: allow to set node and align in k[v]realloc") introduced the ability to force a reallocation if the original object does not satisfy new alignment or NUMA node, even when the object is being shrunk. This introduced two bugs in the reallocation fallback path: 1. Data loss during NUMA migration: The jump to 'alloc_new' happens before 'ks' and 'orig_size' are initialized. As a result, the memcpy() in the 'alloc_new' block would copy 0 bytes into the new allocation. 2. Buffer overflow during shrinking: When shrinking an object while forcing a new alignment, 'new_size' is smaller than the old size. However, the memcpy() used the old size ('orig_size ?: ks'), leading to an out-of-bounds write. The same overflow bug exists in the kvrealloc() fallback path, where the old bucket size ksize(p) is copied into the new buffer without being bounded by the new size. A simple reproducer: // e.g. add to lkdtm as KREALLOC_SHRINK_OVERFLOW while (1) { void *p = kmalloc(128, GFP_KERNEL); p = krealloc_node_align(p, 64, 256, GFP_KERNEL, NUMA_NO_NODE); kfree(p); } demonstrates the issue: ================================================================== BUG: KFENCE: out-of-bounds write in memcpy_orig+0x68/0x130 Out-of-bounds write at 0xffff8883ad757038 (120B right of kfence-#47): memcpy_orig+0x68/0x130 krealloc_node_align_noprof+0x1c8/0x340 lkdtm_KREALLOC_SHRINK_OVERFLOW+0x8c/0xc0 [lkdtm] lkdtm_do_action+0x3a/0x60 [lkdtm] ... kfence-#47: 0xffff8883ad756fc0-0xffff8883ad756fff, size=64, cache=kmalloc-64 allocated by task 316 on cpu 7 at 97.680481s (0.021813s ago): krealloc_node_align_noprof+0x19c/0x340 lkdtm_KREALLOC_SHRINK_OVERFLOW+0x8c/0xc0 [lkdtm] lkdtm_do_action+0x3a/0x60 [lkdtm] ... ================================================================== Fix it by moving the old size calculation to the top of __do_krealloc() and bounding all copy lengths by the new allocation size. Fixes: 2cd8231796b5 ("mm/slub: allow to set node and align in k[v]realloc") Cc: stable@vger.kernel.org Reported-by: https://sashiko.dev/#/patchset/20260415143735.2974230-1-elver%40google.com Signed-off-by: Marco Elver Link: https://patch.msgid.link/20260416132837.3787694-1-elver@google.com Reviewed-by: Harry Yoo (Oracle) Signed-off-by: Vlastimil Babka (SUSE) Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 5b038d1c8250e..27aa0e2975a5f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6999,16 +6999,6 @@ __do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, if (!kasan_check_byte(p)) return NULL; - /* - * If reallocation is not necessary (e. g. the new size is less - * than the current allocated size), the current allocation will be - * preserved unless __GFP_THISNODE is set. In the latter case a new - * allocation on the requested node will be attempted. - */ - if (unlikely(flags & __GFP_THISNODE) && nid != NUMA_NO_NODE && - nid != page_to_nid(virt_to_page(p))) - goto alloc_new; - if (is_kfence_address(p)) { ks = orig_size = kfence_ksize(p); } else { @@ -7027,6 +7017,16 @@ __do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, } } + /* + * If reallocation is not necessary (e. g. the new size is less + * than the current allocated size), the current allocation will be + * preserved unless __GFP_THISNODE is set. In the latter case a new + * allocation on the requested node will be attempted. + */ + if (unlikely(flags & __GFP_THISNODE) && nid != NUMA_NO_NODE && + nid != page_to_nid(virt_to_page(p))) + goto alloc_new; + /* If the old object doesn't fit, allocate a bigger one */ if (new_size > ks) goto alloc_new; @@ -7061,7 +7061,7 @@ __do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, if (ret && p) { /* Disable KASAN checks as the object's redzone is accessed. */ kasan_disable_current(); - memcpy(ret, kasan_reset_tag(p), orig_size ?: ks); + memcpy(ret, kasan_reset_tag(p), min(new_size, (size_t)(orig_size ?: ks))); kasan_enable_current(); } @@ -7288,7 +7288,7 @@ void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long alig if (p) { /* We already know that `p` is not a vmalloc address. */ kasan_disable_current(); - memcpy(n, kasan_reset_tag(p), ksize(p)); + memcpy(n, kasan_reset_tag(p), min(size, ksize(p))); kasan_enable_current(); kfree(p); From f344f04e33bfeb8655996f0c6c71e460e86d4319 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 20 Apr 2026 23:00:48 +0900 Subject: [PATCH 133/554] tracing/fprobe: Reject registration of a registered fprobe before init commit 6ad51ada17ed80c9a5f205b4c01c424cac8b0d46 upstream. Reject registration of a registered fprobe which is on the fprobe hash table before initializing fprobe. The add_fprobe_hash() checks this re-register fprobe, but since fprobe_init() clears hlist_array field, it is too late to check it. It has to check the re-registration before touncing fprobe. Link: https://lore.kernel.org/all/177669364845.132053.18375367916162315835.stgit@mhiramat.tok.corp.google.com/ Fixes: 4346ba160409 ("fprobe: Rewrite fprobe on function-graph tracer") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/fprobe.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 5a807d62e76dc..43b27f07730c2 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -4,6 +4,7 @@ */ #define pr_fmt(fmt) "fprobe: " fmt +#include #include #include #include @@ -98,7 +99,7 @@ static bool delete_fprobe_node(struct fprobe_hlist_node *node) } /* Check existence of the fprobe */ -static bool is_fprobe_still_exist(struct fprobe *fp) +static bool fprobe_registered(struct fprobe *fp) { struct hlist_head *head; struct fprobe_hlist *fph; @@ -111,7 +112,7 @@ static bool is_fprobe_still_exist(struct fprobe *fp) } return false; } -NOKPROBE_SYMBOL(is_fprobe_still_exist); +NOKPROBE_SYMBOL(fprobe_registered); static int add_fprobe_hash(struct fprobe *fp) { @@ -123,9 +124,6 @@ static int add_fprobe_hash(struct fprobe *fp) if (WARN_ON_ONCE(!fph)) return -EINVAL; - if (is_fprobe_still_exist(fp)) - return -EEXIST; - head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)]; hlist_add_head_rcu(&fp->hlist_array->hlist, head); return 0; @@ -140,7 +138,7 @@ static int del_fprobe_hash(struct fprobe *fp) if (WARN_ON_ONCE(!fph)) return -EINVAL; - if (!is_fprobe_still_exist(fp)) + if (!fprobe_registered(fp)) return -ENOENT; fph->fp = NULL; @@ -360,7 +358,7 @@ static void fprobe_return(struct ftrace_graph_ret *trace, if (!fp) break; curr += FPROBE_HEADER_SIZE_IN_LONG; - if (is_fprobe_still_exist(fp) && !fprobe_disabled(fp)) { + if (fprobe_registered(fp) && !fprobe_disabled(fp)) { if (WARN_ON_ONCE(curr + size > size_words)) break; fp->exit_handler(fp, trace->func, ret_ip, fregs, @@ -718,12 +716,14 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num) struct fprobe_hlist *hlist_array; int ret, i; + guard(mutex)(&fprobe_mutex); + if (fprobe_registered(fp)) + return -EEXIST; + ret = fprobe_init(fp, addrs, num); if (ret) return ret; - mutex_lock(&fprobe_mutex); - hlist_array = fp->hlist_array; ret = fprobe_graph_add_ips(addrs, num); if (!ret) { @@ -731,7 +731,6 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num) for (i = 0; i < hlist_array->size; i++) insert_fprobe_node(&hlist_array->array[i]); } - mutex_unlock(&fprobe_mutex); if (ret) fprobe_fail_cleanup(fp); @@ -793,7 +792,7 @@ int unregister_fprobe(struct fprobe *fp) int ret = 0, i, count; mutex_lock(&fprobe_mutex); - if (!fp || !is_fprobe_still_exist(fp)) { + if (!fp || !fprobe_registered(fp)) { ret = -EINVAL; goto out; } From 9b924f3a26b21330a837cfe72e819b6393bbeeaa Mon Sep 17 00:00:00 2001 From: hkbinbin Date: Wed, 1 Apr 2026 12:19:07 +0000 Subject: [PATCH 134/554] RDMA/rxe: Validate pad and ICRC before payload_size() in rxe_rcv commit 7244491dab347f648e661da96dc0febadd9daec3 upstream. rxe_rcv() currently checks only that the incoming packet is at least header_size(pkt) bytes long before payload_size() is used. However, payload_size() subtracts both the attacker-controlled BTH pad field and RXE_ICRC_SIZE from pkt->paylen: payload_size = pkt->paylen - offset[RXE_PAYLOAD] - bth_pad(pkt) - RXE_ICRC_SIZE This means a short packet can still make payload_size() underflow even if it includes enough bytes for the fixed headers. Simply requiring header_size(pkt) + RXE_ICRC_SIZE is not sufficient either, because a packet with a forged non-zero BTH pad can still leave payload_size() negative and pass an underflowed value to later receive-path users. Fix this by validating pkt->paylen against the full minimum length required by payload_size(): header_size(pkt) + bth_pad(pkt) + RXE_ICRC_SIZE. Cc: stable@vger.kernel.org Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://patch.msgid.link/r/20260401121907.1468366-1-hkbinbinbin@gmail.com Signed-off-by: hkbinbin Reviewed-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_recv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 5861e42440490..f79214738c2b8 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -330,7 +330,8 @@ void rxe_rcv(struct sk_buff *skb) pkt->qp = NULL; pkt->mask |= rxe_opcode[pkt->opcode].mask; - if (unlikely(skb->len < header_size(pkt))) + if (unlikely(pkt->paylen < header_size(pkt) + bth_pad(pkt) + + RXE_ICRC_SIZE)) goto drop; err = hdr_check(pkt); From c2178ff1c70ebfc2ab9651b230c58a34683db759 Mon Sep 17 00:00:00 2001 From: Ruide Cao Date: Tue, 21 Apr 2026 12:16:31 +0800 Subject: [PATCH 135/554] ipv4: icmp: validate reply type before using icmp_pointers commit 67bf002a2d7387a6312138210d0bd06e3cf4879b upstream. Extended echo replies use ICMP_EXT_ECHOREPLY as the outbound reply type. That value is outside the range covered by icmp_pointers[], which only describes the traditional ICMP types up to NR_ICMP_TYPES. Avoid consulting icmp_pointers[] for reply types outside that range, and use array_index_nospec() for the remaining in-range lookup. Normal ICMP replies keep their existing behavior unchanged. Fixes: d329ea5bd884 ("icmp: add response to RFC 8335 PROBE messages") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Ruide Cao Signed-off-by: Ren Wei Reviewed-by: Simon Horman Link: https://patch.msgid.link/0dace90c01a5978e829ca741ef684dbd7304ce62.1776628519.git.caoruide123@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/icmp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 980aa17f3534d..fc0a93f43313d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include #include @@ -362,7 +363,9 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, to, len); skb->csum = csum_block_add(skb->csum, csum, odd); - if (icmp_pointers[icmp_param->data.icmph.type].error) + if (icmp_param->data.icmph.type <= NR_ICMP_TYPES && + icmp_pointers[array_index_nospec(icmp_param->data.icmph.type, + NR_ICMP_TYPES + 1)].error) nf_ct_attach(skb, icmp_param->skb); return 0; } From 016bc663657366d386993f63eb31072eb45a2b77 Mon Sep 17 00:00:00 2001 From: Raphael Zimmer Date: Wed, 18 Mar 2026 18:09:03 +0100 Subject: [PATCH 136/554] libceph: Prevent potential null-ptr-deref in ceph_handle_auth_reply() commit 5199c125d25aeae8615c4fc31652cc0fe624338e upstream. If a message of type CEPH_MSG_AUTH_REPLY contains a zero value for both protocol and result, this is currently not treated as an error. In case of ac->negotiating == true and ac->protocol > 0, this leads to setting ac->protocol = 0 and ac->ops = NULL. Thereafter, the check for ac->protocol != protocol returns false, and init_protocol() is not called. Subsequently, ac->ops->handle_reply() is called, which leads to a null pointer dereference, because ac->ops is still NULL. This patch changes the check for ac->protocol != protocol to !ac->protocol, as this also includes the case when the protocol was set to zero in the message. This causes the message to be treated as containing a bad auth protocol. Cc: stable@vger.kernel.org Signed-off-by: Raphael Zimmer Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ceph/auth.c b/net/ceph/auth.c index 0d75679c6a7ed..23d109cb0c6b2 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -245,7 +245,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, ac->protocol = 0; ac->ops = NULL; } - if (ac->protocol != protocol) { + if (!ac->protocol) { ret = init_protocol(ac, protocol); if (ret) { pr_err("auth protocol '%s' init failed: %d\n", From 11baa8b24bcb07ae2048f2566a220021d766abe0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Apr 2026 17:49:06 +0200 Subject: [PATCH 137/554] spi: fix resource leaks on device setup failure commit db357034f7e0cf23f233f414a8508312dfe8fbbe upstream. Make sure to call controller cleanup() if spi_setup() fails while registering a device to avoid leaking any resources allocated by setup(). Fixes: c7299fea6769 ("spi: Fix spi device unregister flow") Cc: stable@vger.kernel.org # 5.13 Cc: Saravana Kannan Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260410154907.129248-2-johan@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi.c | 61 ++++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 03b5d9a6d1ffd..ac4de4703a6f0 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -43,6 +43,8 @@ EXPORT_TRACEPOINT_SYMBOL(spi_transfer_stop); #include "internals.h" +static int __spi_setup(struct spi_device *spi, bool initial_setup); + static DEFINE_IDR(spi_controller_idr); static void spidev_release(struct device *dev) @@ -729,7 +731,7 @@ static int __spi_add_device(struct spi_device *spi) * normally rely on the device being setup. Devices * using SPI_CS_HIGH can't coexist well otherwise... */ - status = spi_setup(spi); + status = __spi_setup(spi, true); if (status < 0) { dev_err(dev, "can't setup %s, status %d\n", dev_name(&spi->dev), status); @@ -3854,27 +3856,7 @@ static int spi_set_cs_timing(struct spi_device *spi) return status; } -/** - * spi_setup - setup SPI mode and clock rate - * @spi: the device whose settings are being modified - * Context: can sleep, and no requests are queued to the device - * - * SPI protocol drivers may need to update the transfer mode if the - * device doesn't work with its default. They may likewise need - * to update clock rates or word sizes from initial values. This function - * changes those settings, and must be called from a context that can sleep. - * Except for SPI_CS_HIGH, which takes effect immediately, the changes take - * effect the next time the device is selected and data is transferred to - * or from it. When this function returns, the SPI device is deselected. - * - * Note that this call will fail if the protocol driver specifies an option - * that the underlying controller or its driver does not support. For - * example, not all hardware supports wire transfers using nine bit words, - * LSB-first wire encoding, or active-high chipselects. - * - * Return: zero on success, else a negative error code. - */ -int spi_setup(struct spi_device *spi) +static int __spi_setup(struct spi_device *spi, bool initial_setup) { unsigned bad_bits, ugly_bits; int status; @@ -3959,7 +3941,7 @@ int spi_setup(struct spi_device *spi) status = spi_set_cs_timing(spi); if (status) { mutex_unlock(&spi->controller->io_mutex); - return status; + goto err_cleanup; } if (spi->controller->auto_runtime_pm && spi->controller->set_cs) { @@ -3968,7 +3950,7 @@ int spi_setup(struct spi_device *spi) mutex_unlock(&spi->controller->io_mutex); dev_err(&spi->controller->dev, "Failed to power device: %d\n", status); - return status; + goto err_cleanup; } /* @@ -4004,6 +3986,37 @@ int spi_setup(struct spi_device *spi) status); return status; + +err_cleanup: + if (initial_setup) + spi_cleanup(spi); + + return status; +} + +/** + * spi_setup - setup SPI mode and clock rate + * @spi: the device whose settings are being modified + * Context: can sleep, and no requests are queued to the device + * + * SPI protocol drivers may need to update the transfer mode if the + * device doesn't work with its default. They may likewise need + * to update clock rates or word sizes from initial values. This function + * changes those settings, and must be called from a context that can sleep. + * Except for SPI_CS_HIGH, which takes effect immediately, the changes take + * effect the next time the device is selected and data is transferred to + * or from it. When this function returns, the SPI device is deselected. + * + * Note that this call will fail if the protocol driver specifies an option + * that the underlying controller or its driver does not support. For + * example, not all hardware supports wire transfers using nine bit words, + * LSB-first wire encoding, or active-high chipselects. + * + * Return: zero on success, else a negative error code. + */ +int spi_setup(struct spi_device *spi) +{ + return __spi_setup(spi, false); } EXPORT_SYMBOL_GPL(spi_setup); From 80f54d6f91426b9994e9074e8e167547859622f3 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 25 Mar 2026 18:19:15 -0700 Subject: [PATCH 138/554] extract-cert: Wrap key_pass with '#ifdef USE_PKCS11_ENGINE' commit 4f96b7c68a9904e01049ef610d701b382dca9574 upstream. A recent strengthening of -Wunused-but-set-variable (enabled with -Wall) in clang under a new subwarning, -Wunused-but-set-global, points out an unused static global variable in certs/extract-cert.c: certs/extract-cert.c:46:20: error: variable 'key_pass' set but not used [-Werror,-Wunused-but-set-global] 46 | static const char *key_pass; | ^ After commit 558bdc45dfb2 ("sign-file,extract-cert: use pkcs11 provider for OPENSSL MAJOR >= 3"), key_pass is only used with the OpenSSL engine API, not the new provider API. Wrap key_pass's declaration and assignment with '#ifdef USE_PKCS11_ENGINE' so that it is only included with its use to clear up the warning. While this is a little uglier than just marking key_pass with the unused attribute, this will make it easier to clean up all code associated with the use of the engine API if it were ever removed in the future. While in the area, use a tab for the key_pass assignment line to match the rest of the file. Cc: stable@vger.kernel.org Fixes: 558bdc45dfb2 ("sign-file,extract-cert: use pkcs11 provider for OPENSSL MAJOR >= 3") Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Link: https://patch.msgid.link/20260325-certs-extract-cert-key_pass-unused-but-set-global-v1-1-ecf94326d532@kernel.org Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- certs/extract-cert.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/certs/extract-cert.c b/certs/extract-cert.c index 7d6d468ed6129..54ecd10242746 100644 --- a/certs/extract-cert.c +++ b/certs/extract-cert.c @@ -43,7 +43,9 @@ void format(void) exit(2); } +#ifdef USE_PKCS11_ENGINE static const char *key_pass; +#endif static BIO *wb; static char *cert_dst; static bool verbose; @@ -135,7 +137,9 @@ int main(int argc, char **argv) if (verbose_env && strchr(verbose_env, '1')) verbose = true; - key_pass = getenv("KBUILD_SIGN_PIN"); +#ifdef USE_PKCS11_ENGINE + key_pass = getenv("KBUILD_SIGN_PIN"); +#endif if (argc != 3) format(); From c5dfddc57f1b6b4e4df6610e917baceb114b8f6e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Mar 2024 14:22:48 +0100 Subject: [PATCH 139/554] tpm: avoid -Wunused-but-set-variable commit 6f1d4d2ecfcd1b577dc87350ea965fe81f272e83 upstream. Outside of the EFI tpm code, the TPM_MEMREMAP()/TPM_MEMUNMAP functions are defined as trivial macros, leading to the mapping_size variable ending up unused: In file included from drivers/char/tpm/tpm-sysfs.c:16: In file included from drivers/char/tpm/tpm.h:28: include/linux/tpm_eventlog.h:167:6: error: variable 'mapping_size' set but not used [-Werror,-Wunused-but-set-variable] 167 | int mapping_size; Turn the stubs into inline functions to avoid this warning. Cc: stable@vger.kernel.org # v5.3+ Fixes: c46f3405692d ("tpm: Reserve the TPM final events table") Signed-off-by: Arnd Bergmann Reviewed-by: Thorsten Blum Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- include/linux/tpm_eventlog.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 891368e82558e..aff8ea2fa98e5 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -131,11 +131,16 @@ struct tcg_algorithm_info { }; #ifndef TPM_MEMREMAP -#define TPM_MEMREMAP(start, size) NULL +static inline void *TPM_MEMREMAP(unsigned long start, size_t size) +{ + return NULL; +} #endif #ifndef TPM_MEMUNMAP -#define TPM_MEMUNMAP(start, size) do{} while(0) +static inline void TPM_MEMUNMAP(void *mapping, size_t size) +{ +} #endif /** From 988eff645be4e54dc01060f9660b1415d59d550a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 22 Apr 2026 15:45:12 +0800 Subject: [PATCH 140/554] LoongArch: Show CPU vulnerabilites correctly commit 37e57e8ad96cdec4a57b55fd10bef50f7370a954 upstream. Most LoongArch processors are vulnerable to Spectre-V1 Proof-of-Concept (PoC). And the generic mechanism, __user pointer sanitization, can be used as a mitigation. This means to use array_index_nospec() to prevent out of boundry access in syscall and other critical paths. Implement the arch-specific cpu_show_spectre_v1() to show CPU Spectre-V1 vulnerabilites correctly. Cc: stable@vger.kernel.org Link: https://cc-sw.com/chinese-loongarch-architecture-evaluation-part-3-of-3/ Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/cpu-probe.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index a2060a24b39fd..4ac9c95991670 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -387,3 +388,9 @@ void cpu_probe(void) cpu_report(); } + +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Mitigation: __user pointer sanitization\n"); +} From bff7dbfc326a295b3f82eda9280bf020eb84461e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 20 Feb 2026 18:49:39 +0100 Subject: [PATCH 141/554] power: supply: axp288_charger: Do not cancel work before initializing it commit 658342fd75b582cbb06544d513171c3d645faead upstream. Driver registered devm handler to cancel_work_sync() before even the work was initialized, thus leading to possible warning from kernel/workqueue.c on (!work->func) check, if the error path was hit before the initialization happened. Use devm_work_autocancel() on each work item independently, which handles the initialization and handler to cancel work. Fixes: 165c2357744e ("power: supply: axp288_charger: Properly stop work on probe-error / remove") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Reviewed-by: Hans de Goede Reviewed-by: Chen-Yu Tsai Link: https://patch.msgid.link/20260220174938.672883-5-krzysztof.kozlowski@oss.qualcomm.com Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/axp288_charger.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index ac05942e4e6ac..ca52c2c82b2cf 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -821,14 +822,6 @@ static int charger_init_hw_regs(struct axp288_chrg_info *info) return 0; } -static void axp288_charger_cancel_work(void *data) -{ - struct axp288_chrg_info *info = data; - - cancel_work_sync(&info->otg.work); - cancel_work_sync(&info->cable.work); -} - static int axp288_charger_probe(struct platform_device *pdev) { int ret, i, pirq; @@ -911,12 +904,12 @@ static int axp288_charger_probe(struct platform_device *pdev) } /* Cancel our work on cleanup, register this before the notifiers */ - ret = devm_add_action(dev, axp288_charger_cancel_work, info); + ret = devm_work_autocancel(dev, &info->cable.work, + axp288_charger_extcon_evt_worker); if (ret) return ret; /* Register for extcon notification */ - INIT_WORK(&info->cable.work, axp288_charger_extcon_evt_worker); info->cable.nb.notifier_call = axp288_charger_handle_cable_evt; ret = devm_extcon_register_notifier_all(dev, info->cable.edev, &info->cable.nb); @@ -926,8 +919,12 @@ static int axp288_charger_probe(struct platform_device *pdev) } schedule_work(&info->cable.work); + ret = devm_work_autocancel(dev, &info->otg.work, + axp288_charger_otg_evt_worker); + if (ret) + return ret; + /* Register for OTG notification */ - INIT_WORK(&info->otg.work, axp288_charger_otg_evt_worker); info->otg.id_nb.notifier_call = axp288_charger_handle_otg_evt; if (info->otg.cable) { ret = devm_extcon_register_notifier(dev, info->otg.cable, From b69a8729f84a8f3a471cbd331957c27675ede140 Mon Sep 17 00:00:00 2001 From: Sanman Pradhan Date: Fri, 10 Apr 2026 00:26:19 +0000 Subject: [PATCH 142/554] hwmon: (isl28022) Fix integer overflow in power calculation on 32-bit commit a7c0aaa50e40ffd8fd703d006d5a04b540b9ca92 upstream. isl28022_read_power() computes: *val = ((51200000L * ((long)data->gain)) / (long)data->shunt) * (long)regval; On 32-bit platforms, 'long' is 32 bits. With gain=8 and shunt=10000 (the default configuration): (51200000 * 8) / 10000 = 40960 40960 * 65535 = 2,684,313,600 This exceeds LONG_MAX (2,147,483,647), resulting in signed integer overflow. Additionally, dividing before multiplying by regval loses precision unnecessarily. Use u64 arithmetic with div_u64() and multiply before dividing to retain precision. The intermediate product cannot overflow u64 (worst case: 51200000 * 8 * 65535 = 26843136000000). Power is inherently non-negative, so unsigned types are the natural fit. Cap the result to LONG_MAX before returning it through the hwmon callback. Fixes: 39671a14df4f2 ("hwmon: (isl28022) new driver for ISL28022 power monitor") Cc: stable@vger.kernel.org Signed-off-by: Sanman Pradhan Link: https://lore.kernel.org/r/20260410002613.424557-1-sanman.pradhan@hpe.com Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/isl28022.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/isl28022.c b/drivers/hwmon/isl28022.c index c2e559dde63f6..c5a34ceedcdb2 100644 --- a/drivers/hwmon/isl28022.c +++ b/drivers/hwmon/isl28022.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -185,8 +186,8 @@ static int isl28022_read_power(struct device *dev, u32 attr, long *val) ISL28022_REG_POWER, ®val); if (err < 0) return err; - *val = ((51200000L * ((long)data->gain)) / - (long)data->shunt) * (long)regval; + *val = min(div_u64(51200000ULL * data->gain * regval, + data->shunt), LONG_MAX); break; default: return -EOPNOTSUPP; From b3166d1657534a6cc598adc8745d9b9a96b81e90 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 30 Mar 2026 10:27:51 +0200 Subject: [PATCH 143/554] fs: prepare for adding LSM blob to backing_file commit 880bd496ec72a6dcb00cb70c430ef752ba242ae7 upstream. In preparation to adding LSM blob to backing_file struct, factor out helpers init_backing_file() and backing_file_free(). Cc: stable@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Cc: linux-unionfs@vger.kernel.org Cc: linux-erofs@lists.ozlabs.org Signed-off-by: Amir Goldstein Reviewed-by: Serge Hallyn [PM: use the term "LSM blob", fix comment style to match file] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- fs/file_table.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c index 34244fccf2edf..762f03dcbcd77 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -66,6 +66,12 @@ void backing_file_set_user_path(struct file *f, const struct path *path) } EXPORT_SYMBOL_GPL(backing_file_set_user_path); +static inline void backing_file_free(struct backing_file *ff) +{ + path_put(&ff->user_path); + kmem_cache_free(bfilp_cachep, ff); +} + static inline void file_free(struct file *f) { security_file_free(f); @@ -73,8 +79,7 @@ static inline void file_free(struct file *f) percpu_counter_dec(&nr_files); put_cred(f->f_cred); if (unlikely(f->f_mode & FMODE_BACKING)) { - path_put(backing_file_user_path(f)); - kmem_cache_free(bfilp_cachep, backing_file(f)); + backing_file_free(backing_file(f)); } else { kmem_cache_free(filp_cachep, f); } @@ -283,6 +288,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) return f; } +static int init_backing_file(struct backing_file *ff) +{ + memset(&ff->user_path, 0, sizeof(ff->user_path)); + return 0; +} + /* * Variant of alloc_empty_file() that allocates a backing_file container * and doesn't check and modify nr_files. @@ -305,7 +316,14 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred) return ERR_PTR(error); } + /* The f_mode flags must be set before fput(). */ ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT; + error = init_backing_file(ff); + if (unlikely(error)) { + fput(&ff->file); + return ERR_PTR(error); + } + return &ff->file; } From 77eb3e79a3e75b1dff8d8a8a6772a7d0ceaeccb5 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 16 Oct 2025 14:51:17 +0800 Subject: [PATCH 144/554] drm/amd: Fix set but not used warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 46791d147d3ab3262298478106ef2a52fc7192e2 upstream. There are many set but not used warnings under drivers/gpu/drm/amd when compiling with the latest upstream mainline GCC: drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c:305:18: warning: variable ‘p’ set but not used [-Wunused-but-set-variable=] drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h:103:26: warning: variable ‘internal_reg_offset’ set but not used [-Wunused-but-set-variable=] ... drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h:164:26: warning: variable ‘internal_reg_offset’ set but not used [-Wunused-but-set-variable=] ... drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:445:13: warning: variable ‘pipe_idx’ set but not used [-Wunused-but-set-variable=] drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:875:21: warning: variable ‘pipe_idx’ set but not used [-Wunused-but-set-variable=] Remove the variables actually not used or add __maybe_unused attribute for the variables actually used to fix them, compile tested only. Signed-off-by: Tiezhu Yang Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 4 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 6 ++++-- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 9 +++------ 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index b2033f8352f50..83f3b94ed975a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -302,7 +302,6 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages) { unsigned t; - unsigned p; int i, j; u64 page_base; /* Starting from VEGA10, system bit must be 0 to mean invalid. */ @@ -316,8 +315,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, return; t = offset / AMDGPU_GPU_PAGE_SIZE; - p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE; - for (i = 0; i < pages; i++, p++) { + for (i = 0; i < pages; i++) { page_base = adev->dummy_page_addr; if (!adev->gart.ptr) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index dc8a17bcc3c8d..82624b44e661a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -100,7 +100,8 @@ #define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) \ ({ \ - uint32_t internal_reg_offset, addr; \ + /* To avoid a -Wunused-but-set-variable warning. */ \ + uint32_t internal_reg_offset __maybe_unused, addr; \ bool video_range, video1_range, aon_range, aon1_range; \ \ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ @@ -161,7 +162,8 @@ #define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \ ({ \ - uint32_t internal_reg_offset, addr; \ + /* To avoid a -Wunused-but-set-variable warning. */ \ + uint32_t internal_reg_offset __maybe_unused, addr; \ bool video_range, video1_range, aon_range, aon1_range; \ \ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 53a088ebddefe..6518d5639d66a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -442,7 +442,6 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru int i = 0, k = 0; int ramp_up_num_steps = 1; // TODO: Ramp is currently disabled. Reenable it. uint8_t visual_confirm_enabled; - int pipe_idx = 0; struct dc_stream_status *stream_status = NULL; if (dc == NULL) @@ -457,7 +456,7 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru cmd.fw_assisted_mclk_switch.config_data.visual_confirm_enabled = visual_confirm_enabled; if (should_manage_pstate) { - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (!pipe->stream) @@ -472,7 +471,6 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru cmd.fw_assisted_mclk_switch.config_data.vactive_stretch_margin_us = dc->debug.fpo_vactive_margin_us; break; } - pipe_idx++; } } @@ -872,7 +870,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, bool enable) { uint8_t cmd_pipe_index = 0; - uint32_t i, pipe_idx; + uint32_t i; uint8_t subvp_count = 0; union dmub_rb_cmd cmd; struct pipe_ctx *subvp_pipes[2]; @@ -899,7 +897,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, if (enable) { // For each pipe that is a "main" SUBVP pipe, fill in pipe data for DMUB SUBVP cmd - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); @@ -922,7 +920,6 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, populate_subvp_cmd_vblank_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++); } - pipe_idx++; } if (subvp_count == 2) { update_subvp_prefetch_end_to_mall_start(dc, context, &cmd, subvp_pipes); From 6b11dfb3c5173ef7ce69fe48cdeb23d3118464a0 Mon Sep 17 00:00:00 2001 From: Brahmajit Das Date: Mon, 22 Dec 2025 00:25:31 +0530 Subject: [PATCH 145/554] ASoC: Intel: avs: replace strcmp with sysfs_streq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 45e9066f3a487e9e26b842644364d045af054775 upstream. allmodconfig failes to build with GCC 16 with the following build error sound/soc/intel/avs/path.c:137:38: error: ‘strcmp’ reading 1 or more bytes from a region of size 0 [-Werror=stringop-overread] 137 | return id->id == id2->id && !strcmp(id->tplg_name, id2->tplg_name); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ‘avs_condpaths_walk’: events 1-3 137 | return id->id == id2->id && !strcmp(id->tplg_name, id2->tplg_name); | ~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | | | | | (3) warning happens here | (1) when the condition is evaluated to true ...... 155 | if (id->id != path->template->owner->id || | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | | | (2) when the condition is evaluated to false 156 | strcmp(id->tplg_name, path->template->owner->owner->name)) | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from sound/soc/intel/avs/path.h:14, from sound/soc/intel/avs/path.c:15: sound/soc/intel/avs/topology.h: In function ‘avs_condpaths_walk’: sound/soc/intel/avs/topology.h:152:13: note: at offset 4 into source object ‘id’ of size 4 152 | u32 id; | ^~ Using the sysfs_streq as an alternative to strcmp helps getting around this build failure. Please also refer https://docs.kernel.org/core-api/kernel-api.html#c.__sysfs_match_string Signed-off-by: Brahmajit Das Link: https://patch.msgid.link/20251221185531.6453-1-listout@listout.xyz Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/avs/path.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/avs/path.c b/sound/soc/intel/avs/path.c index 7aa20fcf1a33b..0c66c8bb2f89d 100644 --- a/sound/soc/intel/avs/path.c +++ b/sound/soc/intel/avs/path.c @@ -134,7 +134,7 @@ static struct avs_tplg_path *avs_condpath_find_variant(struct avs_dev *adev, static bool avs_tplg_path_template_id_equal(struct avs_tplg_path_template_id *id, struct avs_tplg_path_template_id *id2) { - return id->id == id2->id && !strcmp(id->tplg_name, id2->tplg_name); + return id->id == id2->id && !sysfs_streq(id->tplg_name, id2->tplg_name); } static struct avs_path *avs_condpath_find_match(struct avs_dev *adev, From 95d48e37a1304d6148406c799479c0fb505aefa7 Mon Sep 17 00:00:00 2001 From: Sanman Pradhan Date: Fri, 10 Apr 2026 00:25:55 +0000 Subject: [PATCH 146/554] hwmon: (pt5161l) Fix bugs in pt5161l_read_block_data() commit 24c73e93d6a756e1b8626bb259d2e07c5b89b370 upstream. Fix two bugs in pt5161l_read_block_data(): 1. Buffer overrun: The local buffer rbuf is declared as u8 rbuf[24], but i2c_smbus_read_block_data() can return up to I2C_SMBUS_BLOCK_MAX (32) bytes. The i2c-core copies the data into the caller's buffer before the return value can be checked, so the post-read length validation does not prevent a stack overrun if a device returns more than 24 bytes. Resize the buffer to I2C_SMBUS_BLOCK_MAX. 2. Unexpected positive return on length mismatch: When all three retries are exhausted because the device returns data with an unexpected length, i2c_smbus_read_block_data() returns a positive byte count. The function returns this directly, and callers treat any non-negative return as success, processing stale or incomplete buffer contents. Return -EIO when retries are exhausted with a positive return value, preserving the negative error code on I2C failure. Fixes: 1b2ca93cd0592 ("hwmon: Add driver for Astera Labs PT5161L retimer") Cc: stable@vger.kernel.org Signed-off-by: Sanman Pradhan Link: https://lore.kernel.org/r/20260410002549.424162-1-sanman.pradhan@hpe.com Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/pt5161l.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/pt5161l.c b/drivers/hwmon/pt5161l.c index 20e3cfa625f17..89d4da8aa4c09 100644 --- a/drivers/hwmon/pt5161l.c +++ b/drivers/hwmon/pt5161l.c @@ -121,7 +121,7 @@ static int pt5161l_read_block_data(struct pt5161l_data *data, u32 address, int ret, tries; u8 remain_len = len; u8 curr_len; - u8 wbuf[16], rbuf[24]; + u8 wbuf[16], rbuf[I2C_SMBUS_BLOCK_MAX]; u8 cmd = 0x08; /* [7]:pec_en, [4:2]:func, [1]:start, [0]:end */ u8 config = 0x00; /* [6]:cfg_type, [4:1]:burst_len, [0]:address bit16 */ @@ -151,7 +151,7 @@ static int pt5161l_read_block_data(struct pt5161l_data *data, u32 address, break; } if (tries >= 3) - return ret; + return ret < 0 ? ret : -EIO; memcpy(val, rbuf, curr_len); val += curr_len; From d780f24a4939862e1df9def415fe7de59280fc7c Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 3 Mar 2026 15:08:38 +0000 Subject: [PATCH 147/554] randomize_kstack: Maintain kstack_offset per task commit 37beb42560165869838e7d91724f3e629db64129 upstream. kstack_offset was previously maintained per-cpu, but this caused a couple of issues. So let's instead make it per-task. Issue 1: add_random_kstack_offset() and choose_random_kstack_offset() expected and required to be called with interrupts and preemption disabled so that it could manipulate per-cpu state. But arm64, loongarch and risc-v are calling them with interrupts and preemption enabled. I don't _think_ this causes any functional issues, but it's certainly unexpected and could lead to manipulating the wrong cpu's state, which could cause a minor performance degradation due to bouncing the cache lines. By maintaining the state per-task those functions can safely be called in preemptible context. Issue 2: add_random_kstack_offset() is called before executing the syscall and expands the stack using a previously chosen random offset. choose_random_kstack_offset() is called after executing the syscall and chooses and stores a new random offset for the next syscall. With per-cpu storage for this offset, an attacker could force cpu migration during the execution of the syscall and prevent the offset from being updated for the original cpu such that it is predictable for the next syscall on that cpu. By maintaining the state per-task, this problem goes away because the per-task random offset is updated after the syscall regardless of which cpu it is executing on. Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall") Closes: https://lore.kernel.org/all/dd8c37bc-795f-4c7a-9086-69e584d8ab24@arm.com/ Cc: stable@vger.kernel.org Acked-by: Mark Rutland Signed-off-by: Ryan Roberts Link: https://patch.msgid.link/20260303150840.3789438-2-ryan.roberts@arm.com Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/linux/randomize_kstack.h | 26 +++++++++++++++----------- include/linux/sched.h | 4 ++++ init/main.c | 1 - kernel/fork.c | 2 ++ 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 1d982dbdd0d0b..5d3916ca747cc 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -9,7 +9,6 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, randomize_kstack_offset); -DECLARE_PER_CPU(u32, kstack_offset); /* * Do not use this anywhere else in the kernel. This is used here because @@ -50,15 +49,14 @@ DECLARE_PER_CPU(u32, kstack_offset); * add_random_kstack_offset - Increase stack utilization by previously * chosen random offset * - * This should be used in the syscall entry path when interrupts and - * preempt are disabled, and after user registers have been stored to - * the stack. For testing the resulting entropy, please see: - * tools/testing/selftests/lkdtm/stack-entropy.sh + * This should be used in the syscall entry path after user registers have been + * stored to the stack. Preemption may be enabled. For testing the resulting + * entropy, please see: tools/testing/selftests/lkdtm/stack-entropy.sh */ #define add_random_kstack_offset() do { \ if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ - u32 offset = raw_cpu_read(kstack_offset); \ + u32 offset = current->kstack_offset; \ u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \ /* Keep allocation even after "ptr" loses scope. */ \ asm volatile("" :: "r"(ptr) : "memory"); \ @@ -69,9 +67,9 @@ DECLARE_PER_CPU(u32, kstack_offset); * choose_random_kstack_offset - Choose the random offset for the next * add_random_kstack_offset() * - * This should only be used during syscall exit when interrupts and - * preempt are disabled. This position in the syscall flow is done to - * frustrate attacks from userspace attempting to learn the next offset: + * This should only be used during syscall exit. Preemption may be enabled. This + * position in the syscall flow is done to frustrate attacks from userspace + * attempting to learn the next offset: * - Maximize the timing uncertainty visible from userspace: if the * offset is chosen at syscall entry, userspace has much more control * over the timing between choosing offsets. "How long will we be in @@ -85,14 +83,20 @@ DECLARE_PER_CPU(u32, kstack_offset); #define choose_random_kstack_offset(rand) do { \ if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ - u32 offset = raw_cpu_read(kstack_offset); \ + u32 offset = current->kstack_offset; \ offset = ror32(offset, 5) ^ (rand); \ - raw_cpu_write(kstack_offset, offset); \ + current->kstack_offset = offset; \ } \ } while (0) + +static inline void random_kstack_task_init(struct task_struct *tsk) +{ + tsk->kstack_offset = 0; +} #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ #define add_random_kstack_offset() do { } while (0) #define choose_random_kstack_offset(rand) do { } while (0) +#define random_kstack_task_init(tsk) do { } while (0) #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 3e2005e9e2f0b..2a540a9065dea 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1614,6 +1614,10 @@ struct task_struct { unsigned long prev_lowest_stack; #endif +#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET + u32 kstack_offset; +#endif + #ifdef CONFIG_X86_MCE void __user *mce_vaddr; __u64 mce_kflags; diff --git a/init/main.c b/init/main.c index 07a3116811c5d..048a625382429 100644 --- a/init/main.c +++ b/init/main.c @@ -830,7 +830,6 @@ static inline void initcall_debug_enable(void) #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, randomize_kstack_offset); -DEFINE_PER_CPU(u32, kstack_offset); static int __init early_randomize_kstack_offset(char *buf) { diff --git a/kernel/fork.c b/kernel/fork.c index 924a9e10106b3..34e6b94c22129 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -95,6 +95,7 @@ #include #include #include +#include #include #include #include @@ -2191,6 +2192,7 @@ __latent_entropy struct task_struct *copy_process( if (retval) goto bad_fork_cleanup_io; + random_kstack_task_init(p); stackleak_task_init(p); if (pid != &init_struct_pid) { From d73c3a4070dce0f86a64415c00ac090cf462be5d Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Wed, 25 Mar 2026 08:49:47 -0500 Subject: [PATCH 148/554] mmc: block: use single block write in retry commit c7c6d4f5103864f73ee3a78bfd6da241f84197dd upstream. Due to errata i2493[0], multi-block write would still fail in retries. With i2493, the MMC interface has the potential of write failures when issuing multi-block writes operating in HS200 mode with excessive IO supply noise. While the errata provides guidance in hardware design and layout to minimize the IO supply noise, in theory the write failure cannot be resolved in hardware. The software solution to ensure the data integrity is to add minimum 5us delay between block writes. Single-block write is the practical way to introduce the delay. This patch reuses recovery_mode flag, and switches to single-block write in retry when multi-block write fails. It covers both CQE and non-CQE cases. [0] https://www.ti.com/lit/pdf/sprz582 Cc: stable@vger.kernel.org Suggested-by: Jens Axboe Signed-off-by: Bin Liu Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 12 ++++++++++-- drivers/mmc/core/queue.h | 3 +++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index c0ffe0817fd4f..6113bb927946d 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1404,6 +1404,9 @@ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, rq_data_dir(req) == WRITE && (md->flags & MMC_BLK_REL_WR); + if (mqrq->flags & MQRQ_XFER_SINGLE_BLOCK) + recovery_mode = 1; + memset(brq, 0, sizeof(struct mmc_blk_request)); mmc_crypto_prepare_req(mqrq); @@ -1543,10 +1546,13 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req) err = 0; if (err) { - if (mqrq->retries++ < MMC_CQE_RETRIES) + if (mqrq->retries++ < MMC_CQE_RETRIES) { + if (rq_data_dir(req) == WRITE) + mqrq->flags |= MQRQ_XFER_SINGLE_BLOCK; blk_mq_requeue_request(req, true); - else + } else { blk_mq_end_request(req, BLK_STS_IOERR); + } } else if (mrq->data) { if (blk_update_request(req, BLK_STS_OK, mrq->data->bytes_xfered)) blk_mq_requeue_request(req, true); @@ -2088,6 +2094,8 @@ static void mmc_blk_mq_complete_rq(struct mmc_queue *mq, struct request *req) } else if (!blk_rq_bytes(req)) { __blk_mq_end_request(req, BLK_STS_IOERR); } else if (mqrq->retries++ < MMC_MAX_RETRIES) { + if (rq_data_dir(req) == WRITE) + mqrq->flags |= MQRQ_XFER_SINGLE_BLOCK; blk_mq_requeue_request(req, true); } else { if (mmc_card_removed(mq->card)) diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 1498840a4ea00..c254e6580afd6 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -61,6 +61,8 @@ enum mmc_drv_op { MMC_DRV_OP_GET_EXT_CSD, }; +#define MQRQ_XFER_SINGLE_BLOCK BIT(0) + struct mmc_queue_req { struct mmc_blk_request brq; struct scatterlist *sg; @@ -69,6 +71,7 @@ struct mmc_queue_req { void *drv_op_data; unsigned int ioc_count; int retries; + u32 flags; }; struct mmc_queue { From 52e1a80bacc66863aca1c3a9ef0dc2da76ca42b3 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 8 Apr 2026 15:18:49 +0800 Subject: [PATCH 149/554] mmc: sdhci-of-dwcmshc: Disable clock before DLL configuration commit 6546a49bbe656981d99a389195560999058c89c4 upstream. According to the ASIC design recommendations, the clock must be disabled before operating the DLL to prevent glitches that could affect the internal digital logic. In extreme cases, failing to do so may cause the controller to malfunction completely. Adds a step to disable the clock before DLL configuration and re-enables it at the end. Fixes: 08f3dff799d4 ("mmc: sdhci-of-dwcmshc: add rockchip platform support") Cc: stable@vger.kernel.org Signed-off-by: Shawn Lin Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-dwcmshc.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index 5ae4e11b1e90c..5b7ffc359414a 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -607,12 +607,15 @@ static void dwcmshc_rk3568_set_clock(struct sdhci_host *host, unsigned int clock extra &= ~BIT(0); sdhci_writel(host, extra, reg); + /* Disable clock while config DLL */ + sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); + if (clock <= 52000000) { if (host->mmc->ios.timing == MMC_TIMING_MMC_HS200 || host->mmc->ios.timing == MMC_TIMING_MMC_HS400) { dev_err(mmc_dev(host->mmc), "Can't reduce the clock below 52MHz in HS200/HS400 mode"); - return; + goto enable_clk; } /* @@ -632,7 +635,7 @@ static void dwcmshc_rk3568_set_clock(struct sdhci_host *host, unsigned int clock DLL_STRBIN_DELAY_NUM_SEL | DLL_STRBIN_DELAY_NUM_DEFAULT << DLL_STRBIN_DELAY_NUM_OFFSET; sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN); - return; + goto enable_clk; } /* Reset DLL */ @@ -659,7 +662,7 @@ static void dwcmshc_rk3568_set_clock(struct sdhci_host *host, unsigned int clock 500 * USEC_PER_MSEC); if (err) { dev_err(mmc_dev(host->mmc), "DLL lock timeout!\n"); - return; + goto enable_clk; } extra = 0x1 << 16 | /* tune clock stop en */ @@ -692,6 +695,16 @@ static void dwcmshc_rk3568_set_clock(struct sdhci_host *host, unsigned int clock DLL_STRBIN_TAPNUM_DEFAULT | DLL_STRBIN_TAPNUM_FROM_SW; sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN); + +enable_clk: + /* + * The sdclk frequency select bits in SDHCI_CLOCK_CONTROL are not functional + * on Rockchip's SDHCI implementation. Instead, the clock frequency is fully + * controlled via external clk provider by calling clk_set_rate(). Consequently, + * passing 0 to sdhci_enable_clk() only re-enables the already-configured clock, + * which matches the hardware's actual behavior. + */ + sdhci_enable_clk(host, 0); } static void rk35xx_sdhci_reset(struct sdhci_host *host, u8 mask) From 6d8087a7d0f342dfeb341dc75e10009613b10753 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Fri, 20 Mar 2026 08:30:30 +0100 Subject: [PATCH 150/554] arm64: dts: ti: am62-verdin: Enable pullup for eMMC data pins commit d5325810814ee995debfa0b6c4a22e0391598bef upstream. Verdin AM62 board does not have external pullups on eMMC DAT1-DAT7 pins. Enable internal pullups on DAT1-DAT7 considering: - without a host-side pullup, these lines rely solely on the eMMC device's internal pullup (R_int, 10kohm-150kohm per JEDEC), which may exceed the recommended 50kohm max for 1.8V VCCQ - JEDEC JESD84-B51 Table 200 requires host-side pullups (R_DAT, 10kohm-100kohm) on all data lines to prevent bus floating Fixes: 316b80246b16 ("arm64: dts: ti: add verdin am62") Cc: stable@vger.kernel.org Signed-off-by: Francesco Dolcini Link: https://patch.msgid.link/20260320073032.10427-1-francesco@dolcini.it Signed-off-by: Vignesh Raghavendra Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi index dc4b228a9fd7e..796f2cedf5d62 100644 --- a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi @@ -572,16 +572,16 @@ /* On-module eMMC */ pinctrl_sdhci0: main-mmc0-default-pins { pinctrl-single,pins = < - AM62X_IOPAD(0x220, PIN_INPUT, 0) /* (Y3) MMC0_CMD */ - AM62X_IOPAD(0x218, PIN_INPUT, 0) /* (AB1) MMC0_CLK */ - AM62X_IOPAD(0x214, PIN_INPUT, 0) /* (AA2) MMC0_DAT0 */ - AM62X_IOPAD(0x210, PIN_INPUT, 0) /* (AA1) MMC0_DAT1 */ - AM62X_IOPAD(0x20c, PIN_INPUT, 0) /* (AA3) MMC0_DAT2 */ - AM62X_IOPAD(0x208, PIN_INPUT, 0) /* (Y4) MMC0_DAT3 */ - AM62X_IOPAD(0x204, PIN_INPUT, 0) /* (AB2) MMC0_DAT4 */ - AM62X_IOPAD(0x200, PIN_INPUT, 0) /* (AC1) MMC0_DAT5 */ - AM62X_IOPAD(0x1fc, PIN_INPUT, 0) /* (AD2) MMC0_DAT6 */ - AM62X_IOPAD(0x1f8, PIN_INPUT, 0) /* (AC2) MMC0_DAT7 */ + AM62X_IOPAD(0x220, PIN_INPUT, 0) /* (Y3) MMC0_CMD */ + AM62X_IOPAD(0x218, PIN_INPUT, 0) /* (AB1) MMC0_CLK */ + AM62X_IOPAD(0x214, PIN_INPUT, 0) /* (AA2) MMC0_DAT0 */ + AM62X_IOPAD(0x210, PIN_INPUT_PULLUP, 0) /* (AA1) MMC0_DAT1 */ + AM62X_IOPAD(0x20c, PIN_INPUT_PULLUP, 0) /* (AA3) MMC0_DAT2 */ + AM62X_IOPAD(0x208, PIN_INPUT_PULLUP, 0) /* (Y4) MMC0_DAT3 */ + AM62X_IOPAD(0x204, PIN_INPUT_PULLUP, 0) /* (AB2) MMC0_DAT4 */ + AM62X_IOPAD(0x200, PIN_INPUT_PULLUP, 0) /* (AC1) MMC0_DAT5 */ + AM62X_IOPAD(0x1fc, PIN_INPUT_PULLUP, 0) /* (AD2) MMC0_DAT6 */ + AM62X_IOPAD(0x1f8, PIN_INPUT_PULLUP, 0) /* (AC2) MMC0_DAT7 */ >; }; From 27f561bf894e46bdc2d6209c50884adad79d8277 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Wed, 1 Apr 2026 10:31:11 +0100 Subject: [PATCH 151/554] crypto: qat - fix IRQ cleanup on 6xxx probe failure commit 95aed2af87ec43fa7624cc81dd13d37824ad4972 upstream. When adf_dev_up() partially completes and then fails, the IRQ handlers registered during adf_isr_resource_alloc() are not detached before the MSI-X vectors are released. Since the device is enabled with pcim_enable_device(), calling pci_alloc_irq_vectors() internally registers pcim_msi_release() as a devres action. On probe failure, devres runs pcim_msi_release() which calls pci_free_irq_vectors(), tearing down the MSI-X vectors while IRQ handlers (for example 'qat0-bundle0') are still attached. This causes remove_proc_entry() warnings: [ 22.163964] remove_proc_entry: removing non-empty directory 'irq/143', leaking at least 'qat0-bundle0' Moving the devm_add_action_or_reset() before adf_dev_up() does not solve the problem since devres runs in LIFO order and pcim_msi_release(), registered later inside adf_dev_up(), would still fire before adf_device_down(). Fix by calling adf_dev_down() explicitly when adf_dev_up() fails, to properly free IRQ handlers before devres releases the MSI-X vectors. Fixes: 17fd7514ae68 ("crypto: qat - add qat_6xxx driver") Cc: stable@vger.kernel.org Signed-off-by: Giovanni Cabiddu Reviewed-by: Ahsan Atta Reviewed-by: Laurent M Coquerel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/intel/qat/qat_6xxx/adf_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c b/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c index c1dc9c56fdf54..f0d112e4b56c3 100644 --- a/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c +++ b/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c @@ -182,8 +182,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return ret; ret = adf_dev_up(accel_dev, true); - if (ret) + if (ret) { + adf_dev_down(accel_dev); return ret; + } ret = devm_add_action_or_reset(dev, adf_device_down, accel_dev); if (ret) From fe570daa51930b066a4ecc2086df308e88a95d0e Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Wed, 25 Mar 2026 13:43:12 +0100 Subject: [PATCH 152/554] xfs: start gc on zonegc_low_space attribute updates commit 181ea4e2de422aa0a66f355bd59bccccdd169826 upstream. Start gc if the agressiveness of zone garbage collection is changed by the user (if the file system is not read only). Without this change, the new setting will not be taken into account until the gc thread is woken up by e.g. a write. Cc: stable@vger.kernel.org # v6.15 Fixes: 845abeb1f06a8a ("xfs: add tunable threshold parameter for triggering zone GC") Signed-off-by: Hans Holmberg Reviewed-by: Christoph Hellwig Reviewed-by: Damien Le Moal Signed-off-by: Carlos Maiolino Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_sysfs.c | 7 ++++++- fs/xfs/xfs_zone_alloc.h | 4 ++++ fs/xfs/xfs_zone_gc.c | 17 +++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 7a5c5ef2db928..dc9562c0cdc6e 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -14,6 +14,7 @@ #include "xfs_log_priv.h" #include "xfs_mount.h" #include "xfs_zones.h" +#include "xfs_zone_alloc.h" struct xfs_sysfs_attr { struct attribute attr; @@ -724,6 +725,7 @@ zonegc_low_space_store( const char *buf, size_t count) { + struct xfs_mount *mp = zoned_to_mp(kobj); int ret; unsigned int val; @@ -734,7 +736,10 @@ zonegc_low_space_store( if (val > 100) return -EINVAL; - zoned_to_mp(kobj)->m_zonegc_low_space = val; + if (mp->m_zonegc_low_space != val) { + mp->m_zonegc_low_space = val; + xfs_zone_gc_wakeup(mp); + } return count; } diff --git a/fs/xfs/xfs_zone_alloc.h b/fs/xfs/xfs_zone_alloc.h index 4db02816d0fda..8b2ef98c81eff 100644 --- a/fs/xfs/xfs_zone_alloc.h +++ b/fs/xfs/xfs_zone_alloc.h @@ -51,6 +51,7 @@ int xfs_mount_zones(struct xfs_mount *mp); void xfs_unmount_zones(struct xfs_mount *mp); void xfs_zone_gc_start(struct xfs_mount *mp); void xfs_zone_gc_stop(struct xfs_mount *mp); +void xfs_zone_gc_wakeup(struct xfs_mount *mp); #else static inline int xfs_mount_zones(struct xfs_mount *mp) { @@ -65,6 +66,9 @@ static inline void xfs_zone_gc_start(struct xfs_mount *mp) static inline void xfs_zone_gc_stop(struct xfs_mount *mp) { } +static inline void xfs_zone_gc_wakeup(struct xfs_mount *mp) +{ +} #endif /* CONFIG_XFS_RT */ #endif /* _XFS_ZONE_ALLOC_H */ diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 4ade544455320..d18c43598d4b3 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -1147,6 +1147,23 @@ xfs_zone_gc_stop( kthread_park(mp->m_zone_info->zi_gc_thread); } +void +xfs_zone_gc_wakeup( + struct xfs_mount *mp) +{ + struct super_block *sb = mp->m_super; + + /* + * If we are unmounting the file system we must not try to + * wake gc as m_zone_info might have been freed already. + */ + if (down_read_trylock(&sb->s_umount)) { + if (!xfs_is_readonly(mp)) + wake_up_process(mp->m_zone_info->zi_gc_thread); + up_read(&sb->s_umount); + } +} + int xfs_zone_gc_mount( struct xfs_mount *mp) From 5c293a1e1ef0f838772d20ae8afae4cbd87cd3f9 Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Wed, 1 Apr 2026 12:02:41 +0800 Subject: [PATCH 153/554] xfs: fix a resource leak in xfs_alloc_buftarg() commit 29a7b2614357393b176ef06ba5bc3ff5afc8df69 upstream. In the error path, call fs_put_dax() to drop the DAX device reference. Fixes: 6f643c57d57c ("xfs: implement ->notify_failure() for XFS") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 47edf3041631b..1ca95ef46a73d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1831,6 +1831,7 @@ xfs_alloc_buftarg( return btp; error_free: + fs_put_dax(btp->bt_daxdev, mp); kfree(btp); return ERR_PTR(error); } From 75d40ccf38ca7768e85eb8e369c0ad543c4e0964 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 17 Feb 2026 16:56:11 +0100 Subject: [PATCH 154/554] firmware: google: framebuffer: Do not unregister platform device commit 5cd28bd28c8ce426b56ce4230dbd17537181d5ad upstream. The native driver takes over the framebuffer aperture by removing the system- framebuffer platform device. Afterwards the pointer in drvdata is dangling. Remove the entire logic around drvdata and let the kernel's aperture helpers handle this. The platform device depends on the native hardware device instead of the coreboot device anyway. When commit 851b4c14532d ("firmware: coreboot: Add coreboot framebuffer driver") added the coreboot framebuffer code, the kernel did not support device-based aperture management. Instead native driviers only removed the conflicting fbdev device. At that point, unregistering the framebuffer device most likely worked correctly. It was definitely broken after commit d9702b2a2171 ("fbdev/simplefb: Do not use struct fb_info.apertures"). So take this commit for the Fixes tag. Earlier releases might work depending on the native hardware driver. Signed-off-by: Thomas Zimmermann Fixes: d9702b2a2171 ("fbdev/simplefb: Do not use struct fb_info.apertures") Acked-by: Tzung-Bi Shih Acked-by: Julius Werner Cc: Thomas Zimmermann Cc: Javier Martinez Canillas Cc: Hans de Goede Cc: linux-fbdev@vger.kernel.org Cc: # v6.3+ Link: https://patch.msgid.link/20260217155836.96267-2-tzimmermann@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/google/framebuffer-coreboot.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/firmware/google/framebuffer-coreboot.c b/drivers/firmware/google/framebuffer-coreboot.c index 0ad813536202d..f44183476ed73 100644 --- a/drivers/firmware/google/framebuffer-coreboot.c +++ b/drivers/firmware/google/framebuffer-coreboot.c @@ -81,19 +81,10 @@ static int framebuffer_probe(struct coreboot_device *dev) sizeof(pdata)); if (IS_ERR(pdev)) pr_warn("coreboot: could not register framebuffer\n"); - else - dev_set_drvdata(&dev->dev, pdev); return PTR_ERR_OR_ZERO(pdev); } -static void framebuffer_remove(struct coreboot_device *dev) -{ - struct platform_device *pdev = dev_get_drvdata(&dev->dev); - - platform_device_unregister(pdev); -} - static const struct coreboot_device_id framebuffer_ids[] = { { .tag = CB_TAG_FRAMEBUFFER }, { /* sentinel */ } @@ -102,7 +93,6 @@ MODULE_DEVICE_TABLE(coreboot, framebuffer_ids); static struct coreboot_driver framebuffer_driver = { .probe = framebuffer_probe, - .remove = framebuffer_remove, .drv = { .name = "framebuffer", }, From 4861d6cd923f9c218f3be6a0e4488b7ccdd0f83a Mon Sep 17 00:00:00 2001 From: Paul Louvel Date: Mon, 30 Mar 2026 12:28:18 +0200 Subject: [PATCH 155/554] crypto: talitos - fix SEC1 32k ahash request limitation commit 655ef638a2bc3cd0a9eff99a02f83cab94a3a917 upstream. Since commit c662b043cdca ("crypto: af_alg/hash: Support MSG_SPLICE_PAGES"), the crypto core may pass large scatterlists spanning multiple pages to drivers supporting ahash operations. As a result, a driver can now receive large ahash requests. The SEC1 engine has a limitation where a single descriptor cannot process more than 32k of data. The current implementation attempts to handle the entire request within a single descriptor, which leads to failures raised by the driver: "length exceeds h/w max limit" Address this limitation by splitting large ahash requests into multiple descriptors, each respecting the 32k hardware limit. This allows processing arbitrarily large requests. Cc: stable@vger.kernel.org Fixes: c662b043cdca ("crypto: af_alg/hash: Support MSG_SPLICE_PAGES") Signed-off-by: Paul Louvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 216 ++++++++++++++++++++++++++------------- 1 file changed, 147 insertions(+), 69 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index e8c0db687c57f..4c325fa0eac19 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -12,6 +12,7 @@ * All rights reserved. */ +#include #include #include #include @@ -870,10 +871,18 @@ struct talitos_ahash_req_ctx { unsigned int swinit; unsigned int first; unsigned int last; + unsigned int last_request; unsigned int to_hash_later; unsigned int nbuf; struct scatterlist bufsl[2]; struct scatterlist *psrc; + + struct scatterlist request_bufsl[2]; + struct ahash_request *areq; + struct scatterlist *request_sl; + unsigned int remaining_ahash_request_bytes; + unsigned int current_ahash_request_bytes; + struct work_struct sec1_ahash_process_remaining; }; struct talitos_export_state { @@ -1759,7 +1768,20 @@ static void ahash_done(struct device *dev, kfree(edesc); - ahash_request_complete(areq, err); + if (err) { + ahash_request_complete(areq, err); + return; + } + + req_ctx->remaining_ahash_request_bytes -= + req_ctx->current_ahash_request_bytes; + + if (!req_ctx->remaining_ahash_request_bytes) { + ahash_request_complete(areq, 0); + return; + } + + schedule_work(&req_ctx->sec1_ahash_process_remaining); } /* @@ -1925,60 +1947,7 @@ static struct talitos_edesc *ahash_edesc_alloc(struct ahash_request *areq, nbytes, 0, 0, 0, areq->base.flags, false); } -static int ahash_init(struct ahash_request *areq) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); - struct talitos_ctx *ctx = crypto_ahash_ctx(tfm); - struct device *dev = ctx->dev; - struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); - unsigned int size; - dma_addr_t dma; - - /* Initialize the context */ - req_ctx->buf_idx = 0; - req_ctx->nbuf = 0; - req_ctx->first = 1; /* first indicates h/w must init its context */ - req_ctx->swinit = 0; /* assume h/w init of context */ - size = (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE) - ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256 - : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512; - req_ctx->hw_context_size = size; - - dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size, - DMA_TO_DEVICE); - dma_unmap_single(dev, dma, req_ctx->hw_context_size, DMA_TO_DEVICE); - - return 0; -} - -/* - * on h/w without explicit sha224 support, we initialize h/w context - * manually with sha224 constants, and tell it to run sha256. - */ -static int ahash_init_sha224_swinit(struct ahash_request *areq) -{ - struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); - - req_ctx->hw_context[0] = SHA224_H0; - req_ctx->hw_context[1] = SHA224_H1; - req_ctx->hw_context[2] = SHA224_H2; - req_ctx->hw_context[3] = SHA224_H3; - req_ctx->hw_context[4] = SHA224_H4; - req_ctx->hw_context[5] = SHA224_H5; - req_ctx->hw_context[6] = SHA224_H6; - req_ctx->hw_context[7] = SHA224_H7; - - /* init 64-bit count */ - req_ctx->hw_context[8] = 0; - req_ctx->hw_context[9] = 0; - - ahash_init(areq); - req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/ - - return 0; -} - -static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) +static int ahash_process_req_one(struct ahash_request *areq, unsigned int nbytes) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct talitos_ctx *ctx = crypto_ahash_ctx(tfm); @@ -1997,12 +1966,12 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) { /* Buffer up to one whole block */ - nents = sg_nents_for_len(areq->src, nbytes); + nents = sg_nents_for_len(req_ctx->request_sl, nbytes); if (nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); return nents; } - sg_copy_to_buffer(areq->src, nents, + sg_copy_to_buffer(req_ctx->request_sl, nents, ctx_buf + req_ctx->nbuf, nbytes); req_ctx->nbuf += nbytes; return 0; @@ -2029,7 +1998,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) sg_init_table(req_ctx->bufsl, nsg); sg_set_buf(req_ctx->bufsl, ctx_buf, req_ctx->nbuf); if (nsg > 1) - sg_chain(req_ctx->bufsl, 2, areq->src); + sg_chain(req_ctx->bufsl, 2, req_ctx->request_sl); req_ctx->psrc = req_ctx->bufsl; } else if (is_sec1 && req_ctx->nbuf && req_ctx->nbuf < blocksize) { int offset; @@ -2038,26 +2007,26 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) offset = blocksize - req_ctx->nbuf; else offset = nbytes_to_hash - req_ctx->nbuf; - nents = sg_nents_for_len(areq->src, offset); + nents = sg_nents_for_len(req_ctx->request_sl, offset); if (nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); return nents; } - sg_copy_to_buffer(areq->src, nents, + sg_copy_to_buffer(req_ctx->request_sl, nents, ctx_buf + req_ctx->nbuf, offset); req_ctx->nbuf += offset; - req_ctx->psrc = scatterwalk_ffwd(req_ctx->bufsl, areq->src, + req_ctx->psrc = scatterwalk_ffwd(req_ctx->bufsl, req_ctx->request_sl, offset); } else - req_ctx->psrc = areq->src; + req_ctx->psrc = req_ctx->request_sl; if (to_hash_later) { - nents = sg_nents_for_len(areq->src, nbytes); + nents = sg_nents_for_len(req_ctx->request_sl, nbytes); if (nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); return nents; } - sg_pcopy_to_buffer(areq->src, nents, + sg_pcopy_to_buffer(req_ctx->request_sl, nents, req_ctx->buf[(req_ctx->buf_idx + 1) & 1], to_hash_later, nbytes - to_hash_later); @@ -2065,7 +2034,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) req_ctx->to_hash_later = to_hash_later; /* Allocate extended descriptor */ - edesc = ahash_edesc_alloc(areq, nbytes_to_hash); + edesc = ahash_edesc_alloc(req_ctx->areq, nbytes_to_hash); if (IS_ERR(edesc)) return PTR_ERR(edesc); @@ -2087,14 +2056,123 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) if (ctx->keylen && (req_ctx->first || req_ctx->last)) edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC; - return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, ahash_done); + return common_nonsnoop_hash(edesc, req_ctx->areq, nbytes_to_hash, ahash_done); } -static int ahash_update(struct ahash_request *areq) +static void sec1_ahash_process_remaining(struct work_struct *work) +{ + struct talitos_ahash_req_ctx *req_ctx = + container_of(work, struct talitos_ahash_req_ctx, + sec1_ahash_process_remaining); + int err = 0; + + req_ctx->request_sl = scatterwalk_ffwd(req_ctx->request_bufsl, + req_ctx->request_sl, TALITOS1_MAX_DATA_LEN); + + if (req_ctx->remaining_ahash_request_bytes > TALITOS1_MAX_DATA_LEN) + req_ctx->current_ahash_request_bytes = TALITOS1_MAX_DATA_LEN; + else { + req_ctx->current_ahash_request_bytes = + req_ctx->remaining_ahash_request_bytes; + + if (req_ctx->last_request) + req_ctx->last = 1; + } + + err = ahash_process_req_one(req_ctx->areq, + req_ctx->current_ahash_request_bytes); + + if (err != -EINPROGRESS) + ahash_request_complete(req_ctx->areq, err); +} + +static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct talitos_ctx *ctx = crypto_ahash_ctx(tfm); + struct device *dev = ctx->dev; + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + req_ctx->areq = areq; + req_ctx->request_sl = areq->src; + req_ctx->remaining_ahash_request_bytes = nbytes; + + if (is_sec1) { + if (nbytes > TALITOS1_MAX_DATA_LEN) + nbytes = TALITOS1_MAX_DATA_LEN; + else if (req_ctx->last_request) + req_ctx->last = 1; + } + + req_ctx->current_ahash_request_bytes = nbytes; + + return ahash_process_req_one(req_ctx->areq, + req_ctx->current_ahash_request_bytes); +} + +static int ahash_init(struct ahash_request *areq) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct talitos_ctx *ctx = crypto_ahash_ctx(tfm); + struct device *dev = ctx->dev; struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + unsigned int size; + dma_addr_t dma; + /* Initialize the context */ + req_ctx->buf_idx = 0; + req_ctx->nbuf = 0; + req_ctx->first = 1; /* first indicates h/w must init its context */ + req_ctx->swinit = 0; /* assume h/w init of context */ + size = (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE) + ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256 + : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512; + req_ctx->hw_context_size = size; + req_ctx->last_request = 0; req_ctx->last = 0; + INIT_WORK(&req_ctx->sec1_ahash_process_remaining, sec1_ahash_process_remaining); + + dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size, + DMA_TO_DEVICE); + dma_unmap_single(dev, dma, req_ctx->hw_context_size, DMA_TO_DEVICE); + + return 0; +} + +/* + * on h/w without explicit sha224 support, we initialize h/w context + * manually with sha224 constants, and tell it to run sha256. + */ +static int ahash_init_sha224_swinit(struct ahash_request *areq) +{ + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + + req_ctx->hw_context[0] = SHA224_H0; + req_ctx->hw_context[1] = SHA224_H1; + req_ctx->hw_context[2] = SHA224_H2; + req_ctx->hw_context[3] = SHA224_H3; + req_ctx->hw_context[4] = SHA224_H4; + req_ctx->hw_context[5] = SHA224_H5; + req_ctx->hw_context[6] = SHA224_H6; + req_ctx->hw_context[7] = SHA224_H7; + + /* init 64-bit count */ + req_ctx->hw_context[8] = 0; + req_ctx->hw_context[9] = 0; + + ahash_init(areq); + req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/ + + return 0; +} + +static int ahash_update(struct ahash_request *areq) +{ + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + + req_ctx->last_request = 0; return ahash_process_req(areq, areq->nbytes); } @@ -2103,7 +2181,7 @@ static int ahash_final(struct ahash_request *areq) { struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); - req_ctx->last = 1; + req_ctx->last_request = 1; return ahash_process_req(areq, 0); } @@ -2112,7 +2190,7 @@ static int ahash_finup(struct ahash_request *areq) { struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); - req_ctx->last = 1; + req_ctx->last_request = 1; return ahash_process_req(areq, areq->nbytes); } From dedaa897b9d785aeb311344d4459cdaf0382e884 Mon Sep 17 00:00:00 2001 From: Paul Louvel Date: Mon, 30 Mar 2026 12:28:19 +0200 Subject: [PATCH 156/554] crypto: talitos - rename first/last to first_desc/last_desc commit a1b80018b8cec27fc06a8b04a7f8b5f6cfe86eae upstream. Previous commit introduces a new last_request variable in the context structure. Renaming the first/last existing member variable in the context structure to improve readability. Cc: stable@vger.kernel.org Signed-off-by: Paul Louvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 46 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 4c325fa0eac19..bc61d0fe35140 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -869,8 +869,8 @@ struct talitos_ahash_req_ctx { u8 buf[2][HASH_MAX_BLOCK_SIZE]; int buf_idx; unsigned int swinit; - unsigned int first; - unsigned int last; + unsigned int first_desc; + unsigned int last_desc; unsigned int last_request; unsigned int to_hash_later; unsigned int nbuf; @@ -889,8 +889,8 @@ struct talitos_export_state { u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)]; u8 buf[HASH_MAX_BLOCK_SIZE]; unsigned int swinit; - unsigned int first; - unsigned int last; + unsigned int first_desc; + unsigned int last_desc; unsigned int to_hash_later; unsigned int nbuf; }; @@ -1722,7 +1722,7 @@ static void common_nonsnoop_hash_unmap(struct device *dev, if (desc->next_desc && desc->ptr[5].ptr != desc2->ptr[5].ptr) unmap_single_talitos_ptr(dev, &desc2->ptr[5], DMA_FROM_DEVICE); - if (req_ctx->last) + if (req_ctx->last_desc) memcpy(areq->result, req_ctx->hw_context, crypto_ahash_digestsize(tfm)); @@ -1759,7 +1759,7 @@ static void ahash_done(struct device *dev, container_of(desc, struct talitos_edesc, desc); struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); - if (!req_ctx->last && req_ctx->to_hash_later) { + if (!req_ctx->last_desc && req_ctx->to_hash_later) { /* Position any partial block for next update/final/finup */ req_ctx->buf_idx = (req_ctx->buf_idx + 1) & 1; req_ctx->nbuf = req_ctx->to_hash_later; @@ -1825,7 +1825,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, /* first DWORD empty */ /* hash context in */ - if (!req_ctx->first || req_ctx->swinit) { + if (!req_ctx->first_desc || req_ctx->swinit) { map_single_talitos_ptr_nosync(dev, &desc->ptr[1], req_ctx->hw_context_size, req_ctx->hw_context, @@ -1833,7 +1833,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, req_ctx->swinit = 0; } /* Indicate next op is not the first. */ - req_ctx->first = 0; + req_ctx->first_desc = 0; /* HMAC key */ if (ctx->keylen) @@ -1866,7 +1866,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, /* fifth DWORD empty */ /* hash/HMAC out -or- hash context out */ - if (req_ctx->last) + if (req_ctx->last_desc) map_single_talitos_ptr(dev, &desc->ptr[5], crypto_ahash_digestsize(tfm), req_ctx->hw_context, DMA_FROM_DEVICE); @@ -1908,7 +1908,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (sg_count > 1) sync_needed = true; copy_talitos_ptr(&desc2->ptr[5], &desc->ptr[5], is_sec1); - if (req_ctx->last) + if (req_ctx->last_desc) map_single_talitos_ptr_nosync(dev, &desc->ptr[5], req_ctx->hw_context_size, req_ctx->hw_context, @@ -1964,7 +1964,7 @@ static int ahash_process_req_one(struct ahash_request *areq, unsigned int nbytes bool is_sec1 = has_ftr_sec1(priv); u8 *ctx_buf = req_ctx->buf[req_ctx->buf_idx]; - if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) { + if (!req_ctx->last_desc && (nbytes + req_ctx->nbuf <= blocksize)) { /* Buffer up to one whole block */ nents = sg_nents_for_len(req_ctx->request_sl, nbytes); if (nents < 0) { @@ -1981,7 +1981,7 @@ static int ahash_process_req_one(struct ahash_request *areq, unsigned int nbytes nbytes_to_hash = nbytes + req_ctx->nbuf; to_hash_later = nbytes_to_hash & (blocksize - 1); - if (req_ctx->last) + if (req_ctx->last_desc) to_hash_later = 0; else if (to_hash_later) /* There is a partial block. Hash the full block(s) now */ @@ -2041,19 +2041,19 @@ static int ahash_process_req_one(struct ahash_request *areq, unsigned int nbytes edesc->desc.hdr = ctx->desc_hdr_template; /* On last one, request SEC to pad; otherwise continue */ - if (req_ctx->last) + if (req_ctx->last_desc) edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_PAD; else edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_CONT; /* request SEC to INIT hash. */ - if (req_ctx->first && !req_ctx->swinit) + if (req_ctx->first_desc && !req_ctx->swinit) edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_INIT; /* When the tfm context has a keylen, it's an HMAC. * A first or last (ie. not middle) descriptor must request HMAC. */ - if (ctx->keylen && (req_ctx->first || req_ctx->last)) + if (ctx->keylen && (req_ctx->first_desc || req_ctx->last_desc)) edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC; return common_nonsnoop_hash(edesc, req_ctx->areq, nbytes_to_hash, ahash_done); @@ -2076,7 +2076,7 @@ static void sec1_ahash_process_remaining(struct work_struct *work) req_ctx->remaining_ahash_request_bytes; if (req_ctx->last_request) - req_ctx->last = 1; + req_ctx->last_desc = 1; } err = ahash_process_req_one(req_ctx->areq, @@ -2103,7 +2103,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) if (nbytes > TALITOS1_MAX_DATA_LEN) nbytes = TALITOS1_MAX_DATA_LEN; else if (req_ctx->last_request) - req_ctx->last = 1; + req_ctx->last_desc = 1; } req_ctx->current_ahash_request_bytes = nbytes; @@ -2124,14 +2124,14 @@ static int ahash_init(struct ahash_request *areq) /* Initialize the context */ req_ctx->buf_idx = 0; req_ctx->nbuf = 0; - req_ctx->first = 1; /* first indicates h/w must init its context */ + req_ctx->first_desc = 1; /* first_desc indicates h/w must init its context */ req_ctx->swinit = 0; /* assume h/w init of context */ size = (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE) ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256 : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512; req_ctx->hw_context_size = size; req_ctx->last_request = 0; - req_ctx->last = 0; + req_ctx->last_desc = 0; INIT_WORK(&req_ctx->sec1_ahash_process_remaining, sec1_ahash_process_remaining); dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size, @@ -2224,8 +2224,8 @@ static int ahash_export(struct ahash_request *areq, void *out) req_ctx->hw_context_size); memcpy(export->buf, req_ctx->buf[req_ctx->buf_idx], req_ctx->nbuf); export->swinit = req_ctx->swinit; - export->first = req_ctx->first; - export->last = req_ctx->last; + export->first_desc = req_ctx->first_desc; + export->last_desc = req_ctx->last_desc; export->to_hash_later = req_ctx->to_hash_later; export->nbuf = req_ctx->nbuf; @@ -2250,8 +2250,8 @@ static int ahash_import(struct ahash_request *areq, const void *in) memcpy(req_ctx->hw_context, export->hw_context, size); memcpy(req_ctx->buf[0], export->buf, export->nbuf); req_ctx->swinit = export->swinit; - req_ctx->first = export->first; - req_ctx->last = export->last; + req_ctx->first_desc = export->first_desc; + req_ctx->last_desc = export->last_desc; req_ctx->to_hash_later = export->to_hash_later; req_ctx->nbuf = export->nbuf; From 82aa32a4d69707fa71a4b0ce1fd246f412fb8e26 Mon Sep 17 00:00:00 2001 From: "Viorel Suman (OSS)" Date: Wed, 11 Mar 2026 14:33:09 +0200 Subject: [PATCH 157/554] pwm: imx-tpm: Count the number of enabled channels in probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3962c24f2d14e8a7f8a23f56b7ce320523947342 upstream. On a soft reset TPM PWM IP may preserve its internal state from previous runtime, therefore on a subsequent OS boot and driver probe "enable_count" value and TPM PWM IP internal channels "enabled" states may get unaligned. In consequence on a suspend/resume cycle the call "if (--tpm->enable_count == 0)" may lead to "enable_count" overflow the system being blocked from entering suspend due to: if (tpm->enable_count > 0) return -EBUSY; Fix the problem by counting the enabled channels in probe function. Signed-off-by: Viorel Suman (OSS) Fixes: 738a1cfec2ed ("pwm: Add i.MX TPM PWM driver support") Link: https://patch.msgid.link/20260311123309.348904-1-viorel.suman@oss.nxp.com Cc: stable@vger.kernel.org Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/pwm-imx-tpm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c index 5b399de16d604..80fdb3303400f 100644 --- a/drivers/pwm/pwm-imx-tpm.c +++ b/drivers/pwm/pwm-imx-tpm.c @@ -352,7 +352,7 @@ static int pwm_imx_tpm_probe(struct platform_device *pdev) struct clk *clk; void __iomem *base; int ret; - unsigned int npwm; + unsigned int i, npwm; u32 val; base = devm_platform_ioremap_resource(pdev, 0); @@ -382,6 +382,13 @@ static int pwm_imx_tpm_probe(struct platform_device *pdev) mutex_init(&tpm->lock); + /* count the enabled channels */ + for (i = 0; i < npwm; ++i) { + val = readl(base + PWM_IMX_TPM_CnSC(i)); + if (FIELD_GET(PWM_IMX_TPM_CnSC_ELS, val)) + ++tpm->enable_count; + } + ret = devm_pwmchip_add(&pdev->dev, chip); if (ret) return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n"); From f8775d9d9062da662cc861f9ff7722a65896d4cd Mon Sep 17 00:00:00 2001 From: Gunnar Kudrjavets Date: Wed, 15 Apr 2026 03:00:03 +0300 Subject: [PATCH 158/554] tpm2-sessions: Fix missing tpm_buf_destroy() in tpm2_read_public() commit f0f75a3d98b7959a8677b6363e23190f3018636b upstream. tpm2_read_public() calls tpm_buf_init() but fails to call tpm_buf_destroy() on two exit paths, leaking a page allocation: 1. When name_size() returns an error (unrecognized hash algorithm), the function returns directly without destroying the buffer. 2. On the success path, the buffer is never destroyed before returning. All other error paths in the function correctly call tpm_buf_destroy() before returning. Fix both by adding the missing tpm_buf_destroy() calls. Cc: stable@vger.kernel.org # v6.19+ Fixes: bda1cbf73c6e ("tpm2-sessions: Fix tpm2_read_public range checks") Signed-off-by: Gunnar Kudrjavets Reviewed-by: Justinien Bouron Reviewed-by: Paul Menzel Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm2-sessions.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c index 3f389e2f6f580..6e53a25f61d49 100644 --- a/drivers/char/tpm/tpm2-sessions.c +++ b/drivers/char/tpm/tpm2-sessions.c @@ -203,8 +203,10 @@ static int tpm2_read_public(struct tpm_chip *chip, u32 handle, void *name) rc = tpm_buf_read_u16(&buf, &offset); name_size_alg = name_size(&buf.data[offset]); - if (name_size_alg < 0) + if (name_size_alg < 0) { + tpm_buf_destroy(&buf); return name_size_alg; + } if (rc != name_size_alg) { tpm_buf_destroy(&buf); @@ -217,6 +219,7 @@ static int tpm2_read_public(struct tpm_chip *chip, u32 handle, void *name) } memcpy(name, &buf.data[offset], rc); + tpm_buf_destroy(&buf); return name_size_alg; } #endif /* CONFIG_TCG_TPM2_HMAC */ From 726ce504235956f4d0a0422ec843aee762c7a36a Mon Sep 17 00:00:00 2001 From: Gunnar Kudrjavets Date: Wed, 8 Apr 2026 12:00:27 +0300 Subject: [PATCH 159/554] tpm: Fix auth session leak in tpm2_get_random() error path commit 666c1a2ca603d8314231200bf8bbb3a81bd64c6b upstream. When tpm_buf_fill_hmac_session() fails inside the do-while loop in tpm2_get_random(), the function returns directly after destroying the buffer, without ending the auth session via tpm2_end_auth_session(). This leaks the TPM auth session resource. All other error paths within the loop correctly reach the 'out' label which calls both tpm_buf_destroy() and tpm2_end_auth_session(). Fix this by replacing the early return with a goto to the existing 'out' label, which already handles both cleanup operations. The redundant tpm_buf_destroy() call is removed since 'out' takes care of it. Cc: stable@vger.kernel.org # v6.19+ Fixes: 6e9722e9a7bf ("tpm2-sessions: Fix out of range indexing in name_size") Signed-off-by: Gunnar Kudrjavets Reviewed-by: Justinien Bouron Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm2-cmd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index 58a8477cda851..fcd46b78c78b5 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -275,10 +275,8 @@ int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max) NULL, 0); tpm_buf_append_u16(&buf, num_bytes); err = tpm_buf_fill_hmac_session(chip, &buf); - if (err) { - tpm_buf_destroy(&buf); - return err; - } + if (err) + goto out; err = tpm_transmit_cmd(chip, &buf, offsetof(struct tpm2_get_random_out, From 53e6d2d834df40960b655b353e7a8ff4d927e1c7 Mon Sep 17 00:00:00 2001 From: Gunnar Kudrjavets Date: Thu, 9 Apr 2026 17:20:54 +0000 Subject: [PATCH 160/554] tpm: Use kfree_sensitive() to free auth session in tpm_dev_release() commit c424d2664f08c77f08b4580b5f0cbaabf7c229b2 upstream. tpm_dev_release() uses plain kfree() to free chip->auth, which contains sensitive cryptographic material including HMAC session keys, nonces, and passphrase data (struct tpm2_auth). Every other code path that frees this structure uses kfree_sensitive() to zero the memory before releasing it: both tpm2_end_auth_session() and tpm_buf_check_hmac_response() do so. The tpm_dev_release() path is the only one that does not, leaving key material in freed slab memory until it is eventually overwritten. Use kfree_sensitive() for consistency with the rest of the driver and to ensure session keys are scrubbed during device teardown. Cc: stable@vger.kernel.org # v6.10+ Fixes: 699e3efd6c64 ("tpm: Add HMAC session start and end functions") Signed-off-by: Gunnar Kudrjavets Reviewed-by: Justinien Bouron Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index dfeb28866a327..192063a200430 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -283,7 +283,7 @@ static void tpm_dev_release(struct device *dev) kfree(chip->work_space.context_buf); kfree(chip->work_space.session_buf); #ifdef CONFIG_TCG_TPM2_HMAC - kfree(chip->auth); + kfree_sensitive(chip->auth); #endif kfree(chip); } From 6503775a5c4161dbdd2174bccdb9901dc09c2b07 Mon Sep 17 00:00:00 2001 From: Jacqueline Wong Date: Wed, 15 Apr 2026 16:00:05 +0000 Subject: [PATCH 161/554] tpm: tpm_tis: add error logging for data transfer commit 0471921e2d1043dcc6de5cffb49dd37709521abe upstream. Add logging to more easily determine reason for transmit failure Cc: stable@vger.kernel.org # v6.6+ Fixes: 280db21e153d8 ("tpm_tis: Resend command to recover from data transfer errors") Signed-off-by: Jacqueline Wong Signed-off-by: Jordan Hand Link: https://lore.kernel.org/r/20260415160006.2275325-2-jacqwong@google.com Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 8954a8660ffc5..40f70a2d10da9 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -472,6 +472,8 @@ static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len) status = tpm_tis_status(chip); if (!itpm && (status & TPM_STS_DATA_EXPECT) == 0) { rc = -EIO; + dev_err(&chip->dev, "TPM_STS_DATA_EXPECT should be set. sts = 0x%08x\n", + status); goto out_err; } } @@ -492,6 +494,8 @@ static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len) status = tpm_tis_status(chip); if (!itpm && (status & TPM_STS_DATA_EXPECT) != 0) { rc = -EIO; + dev_err(&chip->dev, "TPM_STS_DATA_EXPECT should be unset. sts = 0x%08x\n", + status); goto out_err; } From b5856e85a1f0b9fb6d03fcacfb58ce7a07f1d7ea Mon Sep 17 00:00:00 2001 From: Jacqueline Wong Date: Wed, 15 Apr 2026 16:00:06 +0000 Subject: [PATCH 162/554] tpm: tpm_tis: stop transmit if retries are exhausted commit 949692da7211572fac419b2986b6abc0cd1aeb76 upstream. tpm_tis_send_main() will attempt to retry sending data TPM_RETRY times. Currently, if those retries are exhausted, the driver will attempt to call execute. The TPM will be in the wrong state, leading to the operation simply timing out. Instead, if there is still an error after retries are exhausted, return that error immediately. Cc: stable@vger.kernel.org # v6.6+ Fixes: 280db21e153d8 ("tpm_tis: Resend command to recover from data transfer errors") Signed-off-by: Jacqueline Wong Signed-off-by: Jordan Hand Link: https://lore.kernel.org/r/20260415160006.2275325-3-jacqwong@google.com Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 40f70a2d10da9..8ea252beef7a4 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -557,11 +557,16 @@ static int tpm_tis_send_main(struct tpm_chip *chip, const u8 *buf, size_t len) break; else if (rc != -EAGAIN && rc != -EIO) /* Data transfer failed, not recoverable */ - return rc; + goto out_err; usleep_range(priv->timeout_min, priv->timeout_max); } + if (rc == -EAGAIN || rc == -EIO) { + dev_err(&chip->dev, "Exhausted %d tpm_tis_send_data retries\n", TPM_RETRY); + goto out_err; + } + /* go and do it */ rc = tpm_tis_write8(priv, TPM_STS(priv->locality), TPM_STS_GO); if (rc < 0) From 5184e2c065e3b667663300f104d1053d7cfb2eb0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Apr 2026 14:27:17 +0200 Subject: [PATCH 163/554] rtc: ntxec: fix OF node reference imbalance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 30c4d2f26bb3538c328035cea2e6265c8320539e upstream. The driver reuses the OF node of the parent multi-function device but fails to take another reference to balance the one dropped by the platform bus code when unbinding the MFD and deregistering the child devices. Fix this by using the intended helper for reusing OF nodes. Fixes: 435af89786c6 ("rtc: New driver for RTC in Netronix embedded controller") Cc: stable@vger.kernel.org # 5.13 Cc: Jonathan Neuschäfer Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260407122717.2676774-1-johan@kernel.org Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-ntxec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-ntxec.c b/drivers/rtc/rtc-ntxec.c index 850ca49186fdc..d28ddb34e19e7 100644 --- a/drivers/rtc/rtc-ntxec.c +++ b/drivers/rtc/rtc-ntxec.c @@ -110,7 +110,7 @@ static int ntxec_rtc_probe(struct platform_device *pdev) struct rtc_device *dev; struct ntxec_rtc *rtc; - pdev->dev.of_node = pdev->dev.parent->of_node; + device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent); rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); if (!rtc) From 687ccdf582169cd680aeaf24cc953807c4cd4345 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Mon, 13 Apr 2026 21:26:46 +0200 Subject: [PATCH 164/554] mm/vmalloc: take vmap_purge_lock in shrinker commit ec05f51f1e65bce95528543eb73fda56fd201d94 upstream. decay_va_pool_node() can be invoked concurrently from two paths: __purge_vmap_area_lazy() when pools are being purged, and the shrinker via vmap_node_shrink_scan(). However, decay_va_pool_node() is not safe to run concurrently, and the shrinker path currently lacks serialization, leading to races and possible leaks. Protect decay_va_pool_node() by taking vmap_purge_lock in the shrinker path to ensure serialization with purge users. Link: https://lore.kernel.org/20260413192646.14683-1-urezki@gmail.com Fixes: 7679ba6b36db ("mm: vmalloc: add a shrinker to drain vmap pools") Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Baoquan He Cc: chenyichong Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/vmalloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e2f526ad7abba..c5368e171411d 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -5255,6 +5255,7 @@ vmap_node_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct vmap_node *vn; + guard(mutex)(&vmap_purge_lock); for_each_vmap_node(vn) decay_va_pool_node(vn, true); From c42a7efb9060d89b72708ffaf255d0002c2164a7 Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Wed, 1 Apr 2026 08:57:02 +0800 Subject: [PATCH 165/554] mm/mempolicy: fix memory leaks in weighted_interleave_auto_store() commit 6fae274ce0e3109cbbc4c18b354eaace1f0af7d7 upstream. weighted_interleave_auto_store() fetches old_wi_state inside the if (!input) block only. This causes two memory leaks: 1. When a user writes "false" and the current mode is already manual, the function returns early without freeing the freshly allocated new_wi_state. 2. When a user writes "true", old_wi_state stays NULL because the fetch is skipped entirely. The old state is then overwritten by rcu_assign_pointer() but never freed, since the cleanup path is gated on old_wi_state being non-NULL. A user can trigger this repeatedly by writing "1" in a loop. Fix both leaks by moving the old_wi_state fetch before the input check, making it unconditional. This also allows a unified early return for both "true" and "false" when the requested mode matches the current mode. Link: https://lore.kernel.org/20260401005702.7096-1-liu.yun@linux.dev Link: https://sashiko.dev/#/patchset/20260331100740.84906-1-liu.yun@linux.dev Fixes: e341f9c3c841 ("mm/mempolicy: Weighted Interleave Auto-tuning") Signed-off-by: Jackie Liu Reviewed-by: Joshua Hahn Reviewed by: Donet Tom Cc: Gregory Price Cc: Alistair Popple Cc: Byungchul Park Cc: David Hildenbrand Cc: # v6.16+ Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 94327574fbbbb..779a8cc17a832 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -3636,18 +3636,19 @@ static ssize_t weighted_interleave_auto_store(struct kobject *kobj, new_wi_state->iw_table[i] = 1; mutex_lock(&wi_state_lock); - if (!input) { - old_wi_state = rcu_dereference_protected(wi_state, - lockdep_is_held(&wi_state_lock)); - if (!old_wi_state) - goto update_wi_state; - if (input == old_wi_state->mode_auto) { - mutex_unlock(&wi_state_lock); - return count; - } + old_wi_state = rcu_dereference_protected(wi_state, + lockdep_is_held(&wi_state_lock)); - memcpy(new_wi_state->iw_table, old_wi_state->iw_table, - nr_node_ids * sizeof(u8)); + if (old_wi_state && input == old_wi_state->mode_auto) { + mutex_unlock(&wi_state_lock); + kfree(new_wi_state); + return count; + } + + if (!input) { + if (old_wi_state) + memcpy(new_wi_state->iw_table, old_wi_state->iw_table, + nr_node_ids * sizeof(u8)); goto update_wi_state; } From 8a62c58411cbd748d7aeab0e5b0963e33ff47a7a Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Tue, 31 Mar 2026 18:15:53 +0800 Subject: [PATCH 166/554] mm/damon/stat: fix memory leak on damon_start() failure in damon_stat_start() commit e04ed278d25bf15769800bf6e35c6737f137186f upstream. Destroy the DAMON context and reset the global pointer when damon_start() fails. Otherwise, the context allocated by damon_stat_build_ctx() is leaked, and the stale damon_stat_context pointer will be overwritten on the next enable attempt, making the old allocation permanently unreachable. Link: https://lore.kernel.org/20260331101553.88422-1-liu.yun@linux.dev Fixes: 369c415e6073 ("mm/damon: introduce DAMON_STAT module") Signed-off-by: Jackie Liu Reviewed-by: SeongJae Park Cc: # 6.17.x Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/stat.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/damon/stat.c b/mm/damon/stat.c index 3a55f6e41dc4c..eb728a89924e1 100644 --- a/mm/damon/stat.c +++ b/mm/damon/stat.c @@ -247,8 +247,11 @@ static int damon_stat_start(void) if (!damon_stat_context) return -ENOMEM; err = damon_start(&damon_stat_context, 1, true); - if (err) + if (err) { + damon_destroy_ctx(damon_stat_context); + damon_stat_context = NULL; return err; + } damon_stat_last_refresh_jiffies = jiffies; call_control.data = damon_stat_context; From b09958e235f2b9cd3898b85a8529172afa80d212 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 28 Mar 2026 21:38:59 -0700 Subject: [PATCH 167/554] mm/damon/core: validate damos_quota_goal->nid for node_mem_{used,free}_bp commit 40250b2dded0604a112be605f3828700d80ad7c2 upstream. Patch series "mm/damon/core: validate damos_quota_goal->nid". node_mem[cg]_{used,free}_bp DAMOS quota goals receive the node id. The node id is used for si_meminfo_node() and NODE_DATA() without proper validation. As a result, privileged users can trigger an out of bounds memory access using DAMON_SYSFS. Fix the issues. The issue was originally reported [1] with a fix by another author. The original author announced [2] that they will stop working including the fix that was still in the review stage. Hence I'm restarting this. This patch (of 2): Users can set damos_quota_goal->nid with arbitrary value for node_mem_{used,free}_bp. But DAMON core is using those for si_meminfo_node() without the validation of the value. This can result in out of bounds memory access. The issue can actually triggered using DAMON user-space tool (damo), like below. $ sudo ./damo start --damos_action stat \ --damos_quota_goal node_mem_used_bp 50% -1 \ --damos_quota_interval 1s $ sudo dmesg [...] [ 65.565986] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000098 Fix this issue by adding the validation of the given node. If an invalid node id is given, it returns 0% for used memory ratio, and 100% for free memory ratio. Link: https://lore.kernel.org/20260329043902.46163-2-sj@kernel.org Link: https://lore.kernel.org/20260325073034.140353-1-objecting@objecting.org [1] Link: https://lore.kernel.org/20260327040924.68553-1-sj@kernel.org [2] Fixes: 0e1c773b501f ("mm/damon/core: introduce damos quota goal metrics for memory node utilization") Signed-off-by: SeongJae Park Cc: # 6.16.x Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 3fb199a47fa9e..3cda8682d76b9 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -2038,12 +2038,24 @@ static inline u64 damos_get_some_mem_psi_total(void) #endif /* CONFIG_PSI */ #ifdef CONFIG_NUMA +static bool invalid_mem_node(int nid) +{ + return nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY); +} + static __kernel_ulong_t damos_get_node_mem_bp( struct damos_quota_goal *goal) { struct sysinfo i; __kernel_ulong_t numerator; + if (invalid_mem_node(goal->nid)) { + if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP) + return 0; + else /* DAMOS_QUOTA_NODE_MEM_FREE_BP */ + return 10000; + } + si_meminfo_node(&i, goal->nid); if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP) numerator = i.totalram - i.freeram; From 0fedd09ef95209a53dcea1aeb10976ce2a0b35d1 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 29 Mar 2026 08:23:05 -0700 Subject: [PATCH 168/554] mm/damon/core: use time_in_range_open() for damos quota window start commit 049a57421dd67a28c45ae7e92c36df758033e5fa upstream. damos_adjust_quota() uses time_after_eq() to show if it is time to start a new quota charge window, comparing the current jiffies and the scheduled next charge window start time. If it is, the next charge window start time is updated and the new charge window starts. The time check and next window start time update is skipped while the scheme is deactivated by the watermarks. Let's suppose the deactivation is kept more than LONG_MAX jiffies (assuming CONFIG_HZ of 250, more than 99 days in 32 bit systems and more than one billion years in 64 bit systems), resulting in having the jiffies larger than the next charge window start time + LONG_MAX. Then, the time_after_eq() call can return false until another LONG_MAX jiffies are passed. This means the scheme can continue working after being reactivated by the watermarks. But, soon, the quota will be exceeded and the scheme will again effectively stop working until the next charge window starts. Because the current charge window is extended to up to LONG_MAX jiffies, however, it will look like it stopped unexpectedly and indefinitely, from the user's perspective. Fix this by using !time_in_range_open() instead. The issue was discovered [1] by sashiko. Link: https://lore.kernel.org/20260329152306.45796-1-sj@kernel.org Link: https://lore.kernel.org/20260324040722.57944-1-sj@kernel.org [1] Fixes: ee801b7dd782 ("mm/damon/schemes: activate schemes based on a watermarks mechanism") Signed-off-by: SeongJae Park Cc: # 5.16.x Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 3cda8682d76b9..7738095ea4ce3 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -2179,7 +2179,8 @@ static void damos_adjust_quota(struct damon_ctx *c, struct damos *s) } /* New charge window starts */ - if (time_after_eq(jiffies, quota->charged_from + + if (!time_in_range_open(jiffies, quota->charged_from, + quota->charged_from + msecs_to_jiffies(quota->reset_interval))) { if (quota->esz && quota->charged_sz >= quota->esz) s->stat.qt_exceeds++; From fe06ea2f7b7f8eeec0d11cc9e411f907011b01d1 Mon Sep 17 00:00:00 2001 From: "Denis M. Karpov" Date: Thu, 9 Apr 2026 13:33:45 +0300 Subject: [PATCH 169/554] userfaultfd: allow registration of ranges below mmap_min_addr commit 161ce69c2c89781784b945d8e281ff2da9dede9c upstream. The current implementation of validate_range() in fs/userfaultfd.c performs a hard check against mmap_min_addr. This is redundant because UFFDIO_REGISTER operates on memory ranges that must already be backed by a VMA. Enforcing mmap_min_addr or capability checks again in userfaultfd is unnecessary and prevents applications like binary compilers from using UFFD for valid memory regions mapped by application. Remove the redundant check for mmap_min_addr. We started using UFFD instead of the classic mprotect approach in the binary translator to track application writes. During development, we encountered this bug. The translator cannot control where the translated application chooses to map its memory and if the app requires a low-address area, UFFD fails, whereas mprotect would work just fine. I believe this is a genuine logic bug rather than an improvement, and I would appreciate including the fix in stable. Link: https://lore.kernel.org/20260409103345.15044-1-komlomal@gmail.com Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization") Signed-off-by: Denis M. Karpov Reviewed-by: Lorenzo Stoakes Acked-by: Harry Yoo (Oracle) Reviewed-by: Pedro Falcato Reviewed-by: Liam R. Howlett Reviewed-by: Mike Rapoport (Microsoft) Cc: Alexander Viro Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Cc: Jann Horn Cc: Peter Xu Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/userfaultfd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 54c6cc7fe9c62..df18fb4534037 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1219,8 +1219,6 @@ static __always_inline int validate_unaligned_range( return -EINVAL; if (!len) return -EINVAL; - if (start < mmap_min_addr) - return -EINVAL; if (start >= task_size) return -EINVAL; if (len > task_size - start) From 26f01d03775e4580b31dc9c79322d11af079f11f Mon Sep 17 00:00:00 2001 From: Tao Cui Date: Thu, 9 Apr 2026 18:56:36 +0800 Subject: [PATCH 170/554] LoongArch: KVM: Use CSR_CRMD_PLV in kvm_arch_vcpu_in_kernel() commit da773ea3f59032f659bfc4c450ca86e384786168 upstream. The function reads LOONGARCH_CSR_CRMD but uses CSR_PRMD_PPLV to extract the privilege level. While both masks have the same value (0x3), CSR_CRMD_PLV is the semantically correct constant for CRMD. Cc: stable@vger.kernel.org Reviewed-by: Bibo Mao Signed-off-by: Tao Cui Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kvm/vcpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 14bd36597ab33..a7510b403f526 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -375,7 +375,7 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) val = gcsr_read(LOONGARCH_CSR_CRMD); preempt_enable(); - return (val & CSR_PRMD_PPLV) == PLV_KERN; + return (val & CSR_CRMD_PLV) == PLV_KERN; } #ifdef CONFIG_GUEST_PERF_EVENTS From c098979293dae1dcc59a3a269765e4136e77b065 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 17 Feb 2026 16:54:38 -0800 Subject: [PATCH 171/554] KVM: x86: Defer non-architectural deliver of exception payload to userspace read commit d0ad1b05bbe6f8da159a4dfb6692b3b7ce30ccc8 upstream. When attempting to play nice with userspace that hasn't enabled KVM_CAP_EXCEPTION_PAYLOAD, defer KVM's non-architectural delivery of the payload until userspace actually reads relevant vCPU state, and more importantly, force delivery of the payload in *all* paths where userspace saves relevant vCPU state, not just KVM_GET_VCPU_EVENTS. Ignoring userspace save/restore for the moment, delivering the payload before the exception is injected is wrong regardless of whether L1 or L2 is running. To make matters even more confusing, the flaw *currently* being papered over by the !is_guest_mode() check isn't even the same bug that commit da998b46d244 ("kvm: x86: Defer setting of CR2 until #PF delivery") was trying to avoid. At the time of commit da998b46d244, KVM didn't correctly handle exception intercepts, as KVM would wait until VM-Entry into L2 was imminent to check if the queued exception should morph to a nested VM-Exit. I.e. KVM would deliver the payload to L2 and then synthesize a VM-Exit into L1. But the payload was only the most blatant issue, e.g. waiting to check exception intercepts would also lead to KVM incorrectly escalating a should-be-intercepted #PF into a #DF. That underlying bug was eventually fixed by commit 7709aba8f716 ("KVM: x86: Morph pending exceptions to pending VM-Exits at queue time"), but in the interim, commit a06230b62b89 ("KVM: x86: Deliver exception payload on KVM_GET_VCPU_EVENTS") came along and subtly added another dependency on the !is_guest_mode() check. While not recorded in the changelog, the motivation for deferring the !exception_payload_enabled delivery was to fix a flaw where a synthesized MTF (Monitor Trap Flag) VM-Exit would drop a pending #DB and clobber DR6. On a VM-Exit, VMX CPUs save pending #DB information into the VMCS, which is emulated by KVM in nested_vmx_update_pending_dbg() by grabbing the payload from the queue/pending exception. I.e. prematurely delivering the payload would cause the pending #DB to not be recorded in the VMCS, and of course, clobber L2's DR6 as seen by L1. Jumping back to save+restore, the quirked behavior of forcing delivery of the payload only works if userspace does KVM_GET_VCPU_EVENTS *before* CR2 or DR6 is saved, i.e. before KVM_GET_SREGS{,2} and KVM_GET_DEBUGREGS. E.g. if userspace does KVM_GET_SREGS before KVM_GET_VCPU_EVENTS, then the CR2 saved by userspace won't contain the payload for the exception save by KVM_GET_VCPU_EVENTS. Deliberately deliver the payload in the store_regs() path, as it's the least awful option even though userspace may not be doing save+restore. Because if userspace _is_ doing save restore, it could elide KVM_GET_SREGS knowing that SREGS were already saved when the vCPU exited. Link: https://lore.kernel.org/all/20200207103608.110305-1-oupton@google.com Cc: Yosry Ahmed Cc: stable@vger.kernel.org Reviewed-by: Yosry Ahmed Tested-by: Yosry Ahmed Link: https://patch.msgid.link/20260218005438.2619063-1-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 62 +++++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c71869e545908..24ed813952880 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -861,9 +861,6 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned int nr, vcpu->arch.exception.error_code = error_code; vcpu->arch.exception.has_payload = has_payload; vcpu->arch.exception.payload = payload; - if (!is_guest_mode(vcpu)) - kvm_deliver_exception_payload(vcpu, - &vcpu->arch.exception); return; } @@ -5555,18 +5552,8 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, return 0; } -static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, - struct kvm_vcpu_events *events) +static struct kvm_queued_exception *kvm_get_exception_to_save(struct kvm_vcpu *vcpu) { - struct kvm_queued_exception *ex; - - process_nmi(vcpu); - -#ifdef CONFIG_KVM_SMM - if (kvm_check_request(KVM_REQ_SMI, vcpu)) - process_smi(vcpu); -#endif - /* * KVM's ABI only allows for one exception to be migrated. Luckily, * the only time there can be two queued exceptions is if there's a @@ -5577,21 +5564,46 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, if (vcpu->arch.exception_vmexit.pending && !vcpu->arch.exception.pending && !vcpu->arch.exception.injected) - ex = &vcpu->arch.exception_vmexit; - else - ex = &vcpu->arch.exception; + return &vcpu->arch.exception_vmexit; + + return &vcpu->arch.exception; +} + +static void kvm_handle_exception_payload_quirk(struct kvm_vcpu *vcpu) +{ + struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu); /* - * In guest mode, payload delivery should be deferred if the exception - * will be intercepted by L1, e.g. KVM should not modifying CR2 if L1 - * intercepts #PF, ditto for DR6 and #DBs. If the per-VM capability, - * KVM_CAP_EXCEPTION_PAYLOAD, is not set, userspace may or may not - * propagate the payload and so it cannot be safely deferred. Deliver - * the payload if the capability hasn't been requested. + * If KVM_CAP_EXCEPTION_PAYLOAD is disabled, then (prematurely) deliver + * the pending exception payload when userspace saves *any* vCPU state + * that interacts with exception payloads to avoid breaking userspace. + * + * Architecturally, KVM must not deliver an exception payload until the + * exception is actually injected, e.g. to avoid losing pending #DB + * information (which VMX tracks in the VMCS), and to avoid clobbering + * state if the exception is never injected for whatever reason. But + * if KVM_CAP_EXCEPTION_PAYLOAD isn't enabled, then userspace may or + * may not propagate the payload across save+restore, and so KVM can't + * safely defer delivery of the payload. */ if (!vcpu->kvm->arch.exception_payload_enabled && ex->pending && ex->has_payload) kvm_deliver_exception_payload(vcpu, ex); +} + +static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu); + + process_nmi(vcpu); + +#ifdef CONFIG_KVM_SMM + if (kvm_check_request(KVM_REQ_SMI, vcpu)) + process_smi(vcpu); +#endif + + kvm_handle_exception_payload_quirk(vcpu); memset(events, 0, sizeof(*events)); @@ -5770,6 +5782,8 @@ static int kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, vcpu->arch.guest_state_protected) return -EINVAL; + kvm_handle_exception_payload_quirk(vcpu); + memset(dbgregs, 0, sizeof(*dbgregs)); BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db)); @@ -12125,6 +12139,8 @@ static void __get_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) if (vcpu->arch.guest_state_protected) goto skip_protected_regs; + kvm_handle_exception_payload_quirk(vcpu); + kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); From e07ad47e53e9bfd132f3e3ac20c53970faa10f25 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 10 Feb 2026 01:08:06 +0000 Subject: [PATCH 172/554] KVM: nSVM: Mark all of vmcb02 dirty when restoring nested state commit e63fb1379f4b9300a44739964e69549bebbcdca4 upstream. When restoring a vCPU in guest mode, any state restored before KVM_SET_NESTED_STATE (e.g. KVM_SET_SREGS) will mark the corresponding dirty bits in vmcb01, as it is the active VMCB before switching to vmcb02 in svm_set_nested_state(). Hence, mark all fields in vmcb02 dirty in svm_set_nested_state() to capture any previously restored fields. Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE") CC: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260210010806.3204289-1-yosry.ahmed@linux.dev Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index db35033999a8e..d10447ced9115 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1874,6 +1874,12 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm_switch_vmcb(svm, &svm->nested.vmcb02); nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base); + /* + * Any previously restored state (e.g. KVM_SET_SREGS) would mark fields + * dirty in vmcb01 instead of vmcb02, so mark all of vmcb02 dirty here. + */ + vmcb_mark_all_dirty(svm->vmcb); + /* * While the nested guest CR3 is already checked and set by * KVM_SET_SREGS, it was set when nested state was yet loaded, From 076ab13fe3213f51820f0732611873aec5679b78 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 25 Feb 2026 00:59:43 +0000 Subject: [PATCH 173/554] KVM: nSVM: Sync NextRIP to cached vmcb12 after VMRUN of L2 commit 778d8c1b2a6ffe622ddcd3bb35b620e6e41f4da0 upstream. After VMRUN in guest mode, nested_sync_control_from_vmcb02() syncs fields written by the CPU from vmcb02 to the cached vmcb12. This is because the cached vmcb12 is used as the authoritative copy of some of the controls, and is the payload when saving/restoring nested state. NextRIP is also written by the CPU (in some cases) after VMRUN, but is not sync'd to the cached vmcb12. As a result, it is corrupted after save/restore (replaced by the original value written by L1 on nested VMRUN). This could cause problems for both KVM (e.g. when injecting a soft IRQ) or L1 (e.g. when using NextRIP to advance RIP after emulating an instruction). Fix this by sync'ing NextRIP to the cache after VMRUN of L2, but only after completing interrupts (not in nested_sync_control_from_vmcb02()), as KVM may update NextRIP (e.g. when re-injecting a soft IRQ). Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE") CC: stable@vger.kernel.org Co-developed-by: Sean Christopherson Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260225005950.3739782-2-yosry@kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/svm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 939b944185548..a083f738b2367 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4343,6 +4343,16 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) svm_complete_interrupts(vcpu); + /* + * Update the cache after completing interrupts to get an accurate + * NextRIP, e.g. when re-injecting a soft interrupt. + * + * FIXME: Rework svm_get_nested_state() to not pull data from the + * cache (except for maybe int_ctl). + */ + if (is_guest_mode(vcpu)) + svm->nested.ctl.next_rip = svm->vmcb->control.next_rip; + return svm_exit_handlers_fastpath(vcpu); } From 497f6af9679fc9c6ce2f438e11ed5d51b1aa8297 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 25 Feb 2026 00:59:44 +0000 Subject: [PATCH 174/554] KVM: nSVM: Sync interrupt shadow to cached vmcb12 after VMRUN of L2 commit 03bee264f8ebfd39e0254c98e112d033a7aa9055 upstream. After VMRUN in guest mode, nested_sync_control_from_vmcb02() syncs fields written by the CPU from vmcb02 to the cached vmcb12. This is because the cached vmcb12 is used as the authoritative copy of some of the controls, and is the payload when saving/restoring nested state. int_state is also written by the CPU, specifically bit 0 (i.e. SVM_INTERRUPT_SHADOW_MASK) for nested VMs, but it is not sync'd to cached vmcb12. This does not cause a problem if KVM_SET_NESTED_STATE preceeds KVM_SET_VCPU_EVENTS in the restore path, as an interrupt shadow would be correctly restored to vmcb02 (KVM_SET_VCPU_EVENTS overwrites what KVM_SET_NESTED_STATE restored in int_state). However, if KVM_SET_VCPU_EVENTS preceeds KVM_SET_NESTED_STATE, an interrupt shadow would be restored into vmcb01 instead of vmcb02. This would mostly be benign for L1 (delays an interrupt), but not for L2. For L2, the vCPU could hang (e.g. if a wakeup interrupt is delivered before a HLT that should have been in an interrupt shadow). Sync int_state to the cached vmcb12 in nested_sync_control_from_vmcb02() to avoid this problem. With that, KVM_SET_NESTED_STATE restores the correct interrupt shadow state, and if KVM_SET_VCPU_EVENTS follows it would overwrite it with the same value. Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE") CC: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260225005950.3739782-3-yosry@kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index d10447ced9115..66effdba2d607 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -487,6 +487,7 @@ void nested_sync_control_from_vmcb02(struct vcpu_svm *svm) u32 mask; svm->nested.ctl.event_inj = svm->vmcb->control.event_inj; svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err; + svm->nested.ctl.int_state = svm->vmcb->control.int_state; /* Only a few fields of int_ctl are written by the processor. */ mask = V_IRQ_MASK | V_TPR_MASK; From c15392ed9e49c1a16b4d3a3ccf1b3bf2318a6c28 Mon Sep 17 00:00:00 2001 From: Kevin Cheng Date: Sat, 28 Feb 2026 03:33:26 +0000 Subject: [PATCH 175/554] KVM: SVM: Inject #UD for INVLPGA if EFER.SVME=0 commit d99df02ff427f461102230f9c5b90a6c64ee8e23 upstream. INVLPGA should cause a #UD when EFER.SVME is not set. Add a check to properly inject #UD when EFER.SVME=0. Fixes: ff092385e828 ("KVM: SVM: Implement INVLPGA") Cc: stable@vger.kernel.org Signed-off-by: Kevin Cheng Reviewed-by: Yosry Ahmed Link: https://patch.msgid.link/20260228033328.2285047-3-chengkev@google.com [sean: tag for stable@] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/svm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a083f738b2367..39081a9e98d89 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2289,6 +2289,9 @@ static int invlpga_interception(struct kvm_vcpu *vcpu) gva_t gva = kvm_rax_read(vcpu); u32 asid = kvm_rcx_read(vcpu); + if (nested_svm_check_permissions(vcpu)) + return 1; + /* FIXME: Handle an address size prefix. */ if (!is_long_mode(vcpu)) gva = (u32)gva; From 8302e9be9f9cb9fcd9f3f2ed2397522c1a1e288d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 18 Feb 2026 15:09:51 -0800 Subject: [PATCH 176/554] KVM: SVM: Explicitly mark vmcb01 dirty after modifying VMCB intercepts commit d5bde6113aed8315a2bfe708730b721be9c2f48b upstream. When reacting to an intercept update, explicitly mark vmcb01's intercepts dirty, as KVM always initially operates on vmcb01, and nested_svm_vmexit() isn't guaranteed to mark VMCB_INTERCEPTS as dirty. I.e. if L2 is active, KVM will modify the intercepts for L1, but might not mark them as dirty before the next VMRUN of L1. Fixes: 116a0a23676e ("KVM: SVM: Add clean-bit for intercetps, tsc-offset and pause filter count") Cc: stable@vger.kernel.org Reviewed-by: Yosry Ahmed Link: https://patch.msgid.link/20260218230958.2877682-2-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 66effdba2d607..3cf31c3619da7 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -129,11 +129,13 @@ void recalc_intercepts(struct vcpu_svm *svm) struct vmcb_ctrl_area_cached *g; unsigned int i; - vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_INTERCEPTS); if (!is_guest_mode(&svm->vcpu)) return; + vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + c = &svm->vmcb->control; h = &svm->vmcb01.ptr->control; g = &svm->nested.ctl; From 46b3827abc4473e1f907f4609f9e0190c2fd6d52 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 24 Feb 2026 22:50:17 +0000 Subject: [PATCH 177/554] KVM: nSVM: Ensure AVIC is inhibited when restoring a vCPU to guest mode commit 24f7d36b824b65cf1a2db3db478059187b2a37b0 upstream. On nested VMRUN, KVM ensures AVIC is inhibited by requesting KVM_REQ_APICV_UPDATE, triggering a check of inhibit reasons, finding APICV_INHIBIT_REASON_NESTED, and disabling AVIC. However, when KVM_SET_NESTED_STATE is performed on a vCPU not in guest mode with AVIC enabled, KVM_REQ_APICV_UPDATE is not requested, and AVIC is not inhibited. Request KVM_REQ_APICV_UPDATE in the KVM_SET_NESTED_STATE path if AVIC is active, similar to the nested VMRUN path. Fixes: f44509f849fe ("KVM: x86: SVM: allow AVIC to co-exist with a nested guest running") Cc: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260224225017.3303870-1-yosry@kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 3cf31c3619da7..b9d45b64b97e8 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1896,6 +1896,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm->nested.force_msr_bitmap_recalc = true; + if (kvm_vcpu_apicv_active(vcpu)) + kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); + kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); ret = 0; out_free: From 3428ed1529a1af4cce5aff6c5bd2fcc39ad726bb Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 25 Feb 2026 00:59:47 +0000 Subject: [PATCH 178/554] KVM: nSVM: Always use NextRIP as vmcb02's NextRIP after first L2 VMRUN commit 8d397582f6b5e9fbcf09781c7c934b4910e94a50 upstream. For guests with NRIPS disabled, L1 does not provide NextRIP when running an L2 with an injected soft interrupt, instead it advances the current RIP before running it. KVM uses the current RIP as the NextRIP in vmcb02 to emulate a CPU without NRIPS. However, after L2 runs the first time, NextRIP will be updated by the CPU and/or KVM, and the current RIP is no longer the correct value to use in vmcb02. Hence, after save/restore, use the current RIP if and only if a nested run is pending, otherwise use NextRIP. Give soft_int_next_rip the same treatment, as it's the same logic, just for a narrower use case. Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE") CC: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260225005950.3739782-6-yosry@kernel.org [sean: give soft_int_next_rip the same treatment] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index b9d45b64b97e8..540404aacb128 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -814,24 +814,32 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, vmcb02->control.event_inj_err = svm->nested.ctl.event_inj_err; /* - * next_rip is consumed on VMRUN as the return address pushed on the + * NextRIP is consumed on VMRUN as the return address pushed on the * stack for injected soft exceptions/interrupts. If nrips is exposed - * to L1, take it verbatim from vmcb12. If nrips is supported in - * hardware but not exposed to L1, stuff the actual L2 RIP to emulate - * what a nrips=0 CPU would do (L1 is responsible for advancing RIP - * prior to injecting the event). + * to L1, take it verbatim from vmcb12. + * + * If nrips is supported in hardware but not exposed to L1, stuff the + * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is + * responsible for advancing RIP prior to injecting the event). This is + * only the case for the first L2 run after VMRUN. After that (e.g. + * during save/restore), NextRIP is updated by the CPU and/or KVM, and + * the value of the L2 RIP from vmcb12 should not be used. */ - if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) - vmcb02->control.next_rip = svm->nested.ctl.next_rip; - else if (boot_cpu_has(X86_FEATURE_NRIPS)) - vmcb02->control.next_rip = vmcb12_rip; + if (boot_cpu_has(X86_FEATURE_NRIPS)) { + if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || + !svm->nested.nested_run_pending) + vmcb02->control.next_rip = svm->nested.ctl.next_rip; + else + vmcb02->control.next_rip = vmcb12_rip; + } svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj); if (is_evtinj_soft(vmcb02->control.event_inj)) { svm->soft_int_injected = true; svm->soft_int_csbase = vmcb12_csbase; svm->soft_int_old_rip = vmcb12_rip; - if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) + if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || + !svm->nested.nested_run_pending) svm->soft_int_next_rip = svm->nested.ctl.next_rip; else svm->soft_int_next_rip = vmcb12_rip; From 3a95eb7c695090defaf2fd88f2664a9382381547 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 25 Feb 2026 00:59:48 +0000 Subject: [PATCH 179/554] KVM: nSVM: Delay stuffing L2's current RIP into NextRIP until vCPU run commit a0592461f39c00b28f552fe842a063a00043eaa8 upstream. For guests with NRIPS disabled, L1 does not provide NextRIP when running an L2 with an injected soft interrupt, instead it advances L2's RIP before running it. KVM uses L2's current RIP as the NextRIP in vmcb02 to emulate a CPU without NRIPS. However, in svm_set_nested_state(), the value used for L2's current RIP comes from vmcb02, which is just whatever the vCPU had in vmcb02 before restoring nested state (zero on a freshly created vCPU). Passing the cached RIP value instead (i.e. kvm_rip_read()) would only fix the issue if registers are restored before nested state. Instead, split the logic of setting NextRIP in vmcb02. Handle the 'normal' case of initializing vmcb02's NextRIP using NextRIP from vmcb12 (or KVM_GET_NESTED_STATE's payload) in nested_vmcb02_prepare_control(). Delay the special case of stuffing L2's current RIP into vmcb02's NextRIP until shortly before the vCPU is run, to make sure the most up-to-date value of RIP is used regardless of KVM_SET_REGS and KVM_SET_NESTED_STATE's relative ordering. Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE") CC: stable@vger.kernel.org Suggested-by: Sean Christopherson Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260225005950.3739782-7-yosry@kernel.org [sean: use new helper, svm_fixup_nested_rips()] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 25 ++++++++----------------- arch/x86/kvm/svm/svm.c | 25 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 540404aacb128..8ae0e3d8da87d 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -814,24 +814,15 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, vmcb02->control.event_inj_err = svm->nested.ctl.event_inj_err; /* - * NextRIP is consumed on VMRUN as the return address pushed on the - * stack for injected soft exceptions/interrupts. If nrips is exposed - * to L1, take it verbatim from vmcb12. - * - * If nrips is supported in hardware but not exposed to L1, stuff the - * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is - * responsible for advancing RIP prior to injecting the event). This is - * only the case for the first L2 run after VMRUN. After that (e.g. - * during save/restore), NextRIP is updated by the CPU and/or KVM, and - * the value of the L2 RIP from vmcb12 should not be used. + * If nrips is exposed to L1, take NextRIP as-is. Otherwise, L1 + * advances L2's RIP before VMRUN instead of using NextRIP. KVM will + * stuff the current RIP as vmcb02's NextRIP before L2 is run. After + * the first run of L2 (e.g. after save+restore), NextRIP is updated by + * the CPU and/or KVM and should be used regardless of L1's support. */ - if (boot_cpu_has(X86_FEATURE_NRIPS)) { - if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || - !svm->nested.nested_run_pending) - vmcb02->control.next_rip = svm->nested.ctl.next_rip; - else - vmcb02->control.next_rip = vmcb12_rip; - } + if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || + !svm->nested.nested_run_pending) + vmcb02->control.next_rip = svm->nested.ctl.next_rip; svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj); if (is_evtinj_soft(vmcb02->control.event_inj)) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 39081a9e98d89..a8e8c76c023b8 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3661,6 +3661,29 @@ static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type; } +static void svm_fixup_nested_rips(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (!is_guest_mode(vcpu) || !svm->nested.nested_run_pending) + return; + + /* + * If nrips is supported in hardware but not exposed to L1, stuff the + * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is + * responsible for advancing RIP prior to injecting the event). Once L2 + * runs after L1 executes VMRUN, NextRIP is updated by the CPU and/or + * KVM, and this is no longer needed. + * + * This is done here (as opposed to when preparing vmcb02) to use the + * most up-to-date value of RIP regardless of the order of restoring + * registers and nested state in the vCPU save+restore path. + */ + if (boot_cpu_has(X86_FEATURE_NRIPS) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) + svm->vmcb->control.next_rip = kvm_rip_read(vcpu); +} + void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, int trig_mode, int vector) { @@ -4246,6 +4269,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) } svm->vmcb->save.cr2 = vcpu->arch.cr2; + svm_fixup_nested_rips(vcpu); + svm_hv_update_vp_id(svm->vmcb, vcpu); /* From 88572f9d83d7d25a5c333f2f2e7494d83d8cf97a Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Feb 2026 20:10:10 +0000 Subject: [PATCH 180/554] KVM: nSVM: Use vcpu->arch.cr2 when updating vmcb12 on nested #VMEXIT commit 5c247d08bc81bbad4c662dcf5654137a2f8483ec upstream. KVM currently uses the value of CR2 from vmcb02 to update vmcb12 on nested #VMEXIT. This value is incorrect in some cases, causing L1 to run L2 with a corrupted CR2. This could lead to segfaults or data corruption if L2 is in the middle of handling a #PF and reads a corrupted CR2. Use the correct value in vcpu->arch.cr2 instead. The value in vcpu->arch.cr2 is sync'd to vmcb02 shortly before a VMRUN of L2, and sync'd back to vcpu->arch.cr2 shortly after. The value are only out-of-sync in two cases: after save+restore, and after a #PF is injected into L2. In either case, if a #VMEXIT to L1 is synthesized before L2 runs, using the value in vmcb02 would be incorrect. After save+restore, the value of CR2 is restored by KVM_SET_SREGS into vcpu->arch.cr2. It is not reflect in vmcb02 until a VMRUN of L2. Before that, it holds whatever was in vmcb02 before restore, which would be zero on a new vCPU that never ran nested. If a #VMEXIT to L1 is synthesized before L2 ever runs, using vcpu->arch.cr2 to update vmcb12 is the right thing to do. The #PF injection case is more nuanced. Although the APM is a bit unclear about when CR2 is written during a #PF, the SDM is more clear: Processors update CR2 whenever a page fault is detected. If a second page fault occurs while an earlier page fault is being delivered, the faulting linear address of the second fault will overwrite the contents of CR2 (replacing the previous address). These updates to CR2 occur even if the page fault results in a double fault or occurs during the delivery of a double fault. KVM injecting the exception surely counts as the #PF being "detected". More importantly, when an exception is injected into L2 at the time of a synthesized #VMEXIT, KVM updates exit_int_info in vmcb12 accordingly, such that an L1 hypervisor can re-inject the exception. If CR2 is not written at that point, the L1 hypervisor have no way of correctly re-injecting the #PF. Hence, if a #VMEXIT to L1 is synthesized after the #PF is injected into L2 but before it actually runs, using vcpu->arch.cr2 to update vmcb12 is also the right thing to do. Note that KVM does _not_ update vcpu->arch.cr2 when a #PF is pending for L2, only when it is injected. The distinction is important, because only injected (but not intercepted) exceptions are propagated to L1 through exit_int_info. It would be incorrect to update CR2 in vmcb12 for a pending #PF, as L1 would perceive an updated CR2 value with no #PF. Cc: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260203201010.1871056-1-yosry.ahmed@linux.dev Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 8ae0e3d8da87d..6243e7d91c758 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1114,7 +1114,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb12->save.efer = svm->vcpu.arch.efer; vmcb12->save.cr0 = kvm_read_cr0(vcpu); vmcb12->save.cr3 = kvm_read_cr3(vcpu); - vmcb12->save.cr2 = vmcb02->save.cr2; + vmcb12->save.cr2 = vcpu->arch.cr2; vmcb12->save.cr4 = svm->vcpu.arch.cr4; vmcb12->save.rflags = kvm_get_rflags(vcpu); vmcb12->save.rip = kvm_rip_read(vcpu); From a3f0981a5a0e0bd51ad74cc7d9eed32294b24002 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Mar 2026 00:33:55 +0000 Subject: [PATCH 181/554] KVM: nSVM: Avoid clearing VMCB_LBR in vmcb12 commit b53ab5167a81537777ac780bbd93d32613aa3bda upstream. svm_copy_lbrs() always marks VMCB_LBR dirty in the destination VMCB. However, nested_svm_vmexit() uses it to copy LBRs to vmcb12, and clearing clean bits in vmcb12 is not architecturally defined. Move vmcb_mark_dirty() to callers and drop it for vmcb12. This also facilitates incoming refactoring that does not pass the entire VMCB to svm_copy_lbrs(). Fixes: d20c796ca370 ("KVM: x86: nSVM: implement nested LBR virtualization") Cc: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260303003421.2185681-2-yosry@kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 7 +++++-- arch/x86/kvm/svm/svm.c | 2 -- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 6243e7d91c758..947f3b961ce6f 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -684,6 +684,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 } else { svm_copy_lbrs(vmcb02, vmcb01); } + vmcb_mark_dirty(vmcb02, VMCB_LBR); svm_update_lbrv(&svm->vcpu); } @@ -1188,10 +1189,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm) kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && - (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) + (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { svm_copy_lbrs(vmcb12, vmcb02); - else + } else { svm_copy_lbrs(vmcb01, vmcb02); + vmcb_mark_dirty(vmcb01, VMCB_LBR); + } svm_update_lbrv(vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a8e8c76c023b8..055d3f64d06be 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -811,8 +811,6 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb) to_vmcb->save.br_to = from_vmcb->save.br_to; to_vmcb->save.last_excp_from = from_vmcb->save.last_excp_from; to_vmcb->save.last_excp_to = from_vmcb->save.last_excp_to; - - vmcb_mark_dirty(to_vmcb, VMCB_LBR); } static void __svm_enable_lbrv(struct kvm_vcpu *vcpu) From a5ea402844f175d075ac6a2936f29ad912fa1911 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 4 Mar 2026 16:06:56 -0800 Subject: [PATCH 182/554] KVM: nSVM: Delay setting soft IRQ RIP tracking fields until vCPU run commit c64bc6ed1764c1b7e3c0017019f743196074092f upstream. In the save+restore path, when restoring nested state, the values of RIP and CS base passed into nested_vmcb02_prepare_control() are mostly incorrect. They are both pulled from the vmcb02. For CS base, the value is only correct if system regs are restored before nested state. The value of RIP is whatever the vCPU had in vmcb02 before restoring nested state (zero on a freshly created vCPU). Instead, take a similar approach to NextRIP, and delay initializing the RIP tracking fields until shortly before the vCPU is run, to make sure the most up-to-date values of RIP and CS base are used regardless of KVM_SET_SREGS, KVM_SET_REGS, and KVM_SET_NESTED_STATE's relative ordering. Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE") CC: stable@vger.kernel.org Suggested-by: Sean Christopherson Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260225005950.3739782-8-yosry@kernel.org [sean: deal with the svm_cancel_injection() madness] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 17 ++++++++--------- arch/x86/kvm/svm/svm.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 947f3b961ce6f..f382609474c03 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -712,9 +712,7 @@ static bool is_evtinj_nmi(u32 evtinj) return type == SVM_EVTINJ_TYPE_NMI; } -static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, - unsigned long vmcb12_rip, - unsigned long vmcb12_csbase) +static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) { u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK; u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK; @@ -826,15 +824,16 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, vmcb02->control.next_rip = svm->nested.ctl.next_rip; svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj); + + /* + * soft_int_csbase, soft_int_old_rip, and soft_int_next_rip (if L1 + * doesn't have NRIPS) are initialized later, before the vCPU is run. + */ if (is_evtinj_soft(vmcb02->control.event_inj)) { svm->soft_int_injected = true; - svm->soft_int_csbase = vmcb12_csbase; - svm->soft_int_old_rip = vmcb12_rip; if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || !svm->nested.nested_run_pending) svm->soft_int_next_rip = svm->nested.ctl.next_rip; - else - svm->soft_int_next_rip = vmcb12_rip; } /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */ @@ -919,7 +918,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr); svm_switch_vmcb(svm, &svm->nested.vmcb02); - nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base); + nested_vmcb02_prepare_control(svm); nested_vmcb02_prepare_save(svm, vmcb12); ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3, @@ -1877,7 +1876,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, nested_copy_vmcb_control_to_cache(svm, ctl); svm_switch_vmcb(svm, &svm->nested.vmcb02); - nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base); + nested_vmcb02_prepare_control(svm); /* * Any previously restored state (e.g. KVM_SET_SREGS) would mark fields diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 055d3f64d06be..7174f939cb848 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3558,6 +3558,16 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) return svm_invoke_exit_handler(vcpu, exit_code); } +static void svm_set_nested_run_soft_int_state(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm->soft_int_csbase = svm->vmcb->save.cs.base; + svm->soft_int_old_rip = kvm_rip_read(vcpu); + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) + svm->soft_int_next_rip = kvm_rip_read(vcpu); +} + static int pre_svm_run(struct kvm_vcpu *vcpu) { struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); @@ -3680,6 +3690,13 @@ static void svm_fixup_nested_rips(struct kvm_vcpu *vcpu) if (boot_cpu_has(X86_FEATURE_NRIPS) && !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) svm->vmcb->control.next_rip = kvm_rip_read(vcpu); + + /* + * Simiarly, initialize the soft int metadata here to use the most + * up-to-date values of RIP and CS base, regardless of restore order. + */ + if (svm->soft_int_injected) + svm_set_nested_run_soft_int_state(vcpu); } void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, @@ -4042,6 +4059,18 @@ static void svm_complete_soft_interrupt(struct kvm_vcpu *vcpu, u8 vector, bool is_soft = (type == SVM_EXITINTINFO_TYPE_SOFT); struct vcpu_svm *svm = to_svm(vcpu); + /* + * Initialize the soft int fields *before* reading them below if KVM + * aborted entry to the guest with a nested VMRUN pending. To ensure + * KVM uses up-to-date values for RIP and CS base across save/restore, + * regardless of restore order, KVM waits to set the soft int fields + * until VMRUN is imminent. But when canceling injection, KVM requeues + * the soft int and will reinject it via the standard injection flow, + * and so KVM needs to grab the state from the pending nested VMRUN. + */ + if (is_guest_mode(vcpu) && svm->nested.nested_run_pending) + svm_set_nested_run_soft_int_state(vcpu); + /* * If NRIPS is enabled, KVM must snapshot the pre-VMRUN next_rip that's * associated with the original soft exception/interrupt. next_rip is From 18ff4ab16412ecb9b1d5f8799610a8e1b110ede0 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Mar 2026 00:33:56 +0000 Subject: [PATCH 183/554] KVM: SVM: Switch svm_copy_lbrs() to a macro commit 361dbe8173c460a2bf8aee23920f6c2dbdcabb94 upstream. In preparation for using svm_copy_lbrs() with 'struct vmcb_save_area' without a containing 'struct vmcb', and later even 'struct vmcb_save_area_cached', make it a macro. Macros are generally not preferred compared to functions, mainly due to type-safety. However, in this case it seems like having a simple macro copying a few fields is better than copy-pasting the same 5 lines of code in different places. Cc: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260303003421.2185681-3-yosry@kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 8 ++++---- arch/x86/kvm/svm/svm.c | 9 --------- arch/x86/kvm/svm/svm.h | 10 +++++++++- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index f382609474c03..f208b0dec2f00 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -679,10 +679,10 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 * Reserved bits of DEBUGCTL are ignored. Be consistent with * svm_set_msr's definition of reserved bits. */ - svm_copy_lbrs(vmcb02, vmcb12); + svm_copy_lbrs(&vmcb02->save, &vmcb12->save); vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS; } else { - svm_copy_lbrs(vmcb02, vmcb01); + svm_copy_lbrs(&vmcb02->save, &vmcb01->save); } vmcb_mark_dirty(vmcb02, VMCB_LBR); svm_update_lbrv(&svm->vcpu); @@ -1189,9 +1189,9 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { - svm_copy_lbrs(vmcb12, vmcb02); + svm_copy_lbrs(&vmcb12->save, &vmcb02->save); } else { - svm_copy_lbrs(vmcb01, vmcb02); + svm_copy_lbrs(&vmcb01->save, &vmcb02->save); vmcb_mark_dirty(vmcb01, VMCB_LBR); } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7174f939cb848..addb9827c3d59 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -804,15 +804,6 @@ static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu) */ } -void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb) -{ - to_vmcb->save.dbgctl = from_vmcb->save.dbgctl; - to_vmcb->save.br_from = from_vmcb->save.br_from; - to_vmcb->save.br_to = from_vmcb->save.br_to; - to_vmcb->save.last_excp_from = from_vmcb->save.last_excp_from; - to_vmcb->save.last_excp_to = from_vmcb->save.last_excp_to; -} - static void __svm_enable_lbrv(struct kvm_vcpu *vcpu) { to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e66a16e59b1a5..b713efe65dc90 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -688,8 +688,16 @@ static inline void *svm_vcpu_alloc_msrpm(void) return svm_alloc_permissions_map(MSRPM_SIZE, GFP_KERNEL_ACCOUNT); } +#define svm_copy_lbrs(to, from) \ +do { \ + (to)->dbgctl = (from)->dbgctl; \ + (to)->br_from = (from)->br_from; \ + (to)->br_to = (from)->br_to; \ + (to)->last_excp_from = (from)->last_excp_from; \ + (to)->last_excp_to = (from)->last_excp_to; \ +} while (0) + void svm_vcpu_free_msrpm(void *msrpm); -void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb); void svm_enable_lbrv(struct kvm_vcpu *vcpu); void svm_update_lbrv(struct kvm_vcpu *vcpu); From 2b922a42b531a82d7881add14a7698dcdc5e1f0a Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Mar 2026 00:33:57 +0000 Subject: [PATCH 184/554] KVM: SVM: Add missing save/restore handling of LBR MSRs commit 3700f0788da6acf73b2df56690f4b201aa4aefd2 upstream. MSR_IA32_DEBUGCTLMSR and LBR MSRs are currently not enumerated by KVM_GET_MSR_INDEX_LIST, and LBR MSRs cannot be set with KVM_SET_MSRS. So save/restore is completely broken. Fix it by adding the MSRs to msrs_to_save_base, and allowing writes to LBR MSRs from userspace only (as they are read-only MSRs) if LBR virtualization is enabled. Additionally, to correctly restore L1's LBRs while L2 is running, make sure the LBRs are copied from the captured VMCB01 save area in svm_copy_vmrun_state(). Note, for VMX, this also fixes a flaw where MSR_IA32_DEBUGCTLMSR isn't reported as an MSR to save/restore. Note #2, over-reporting MSR_IA32_LASTxxx on Intel is ok, as KVM already handles unsupported reads and writes thanks to commit b5e2fec0ebc3 ("KVM: Ignore DEBUGCTL MSRs with no effect") (kvm_do_msr_access() will morph the unsupported userspace write into a nop). Fixes: 24e09cbf480a ("KVM: SVM: enable LBR virtualization") Cc: stable@vger.kernel.org Reported-by: Jim Mattson Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260303003421.2185681-4-yosry@kernel.org [sean: guard with lbrv checks, massage changelog] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 5 +++++ arch/x86/kvm/svm/svm.c | 42 ++++++++++++++++++++++++++++++++++----- arch/x86/kvm/x86.c | 3 +++ 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index f208b0dec2f00..2c3a41305317d 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1057,6 +1057,11 @@ void svm_copy_vmrun_state(struct vmcb_save_area *to_save, to_save->isst_addr = from_save->isst_addr; to_save->ssp = from_save->ssp; } + + if (kvm_cpu_cap_has(X86_FEATURE_LBRV)) { + svm_copy_lbrs(to_save, from_save); + to_save->dbgctl &= ~DEBUGCTL_RESERVED_BITS; + } } void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index addb9827c3d59..35e24f8f060ad 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2712,19 +2712,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->tsc_aux; break; case MSR_IA32_DEBUGCTLMSR: - msr_info->data = svm->vmcb->save.dbgctl; + msr_info->data = lbrv ? svm->vmcb->save.dbgctl : 0; break; case MSR_IA32_LASTBRANCHFROMIP: - msr_info->data = svm->vmcb->save.br_from; + msr_info->data = lbrv ? svm->vmcb->save.br_from : 0; break; case MSR_IA32_LASTBRANCHTOIP: - msr_info->data = svm->vmcb->save.br_to; + msr_info->data = lbrv ? svm->vmcb->save.br_to : 0; break; case MSR_IA32_LASTINTFROMIP: - msr_info->data = svm->vmcb->save.last_excp_from; + msr_info->data = lbrv ? svm->vmcb->save.last_excp_from : 0; break; case MSR_IA32_LASTINTTOIP: - msr_info->data = svm->vmcb->save.last_excp_to; + msr_info->data = lbrv ? svm->vmcb->save.last_excp_to : 0; break; case MSR_VM_HSAVE_PA: msr_info->data = svm->nested.hsave_msr; @@ -2999,6 +2999,38 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) vmcb_mark_dirty(svm->vmcb, VMCB_LBR); svm_update_lbrv(vcpu); break; + case MSR_IA32_LASTBRANCHFROMIP: + if (!lbrv) + return KVM_MSR_RET_UNSUPPORTED; + if (!msr->host_initiated) + return 1; + svm->vmcb->save.br_from = data; + vmcb_mark_dirty(svm->vmcb, VMCB_LBR); + break; + case MSR_IA32_LASTBRANCHTOIP: + if (!lbrv) + return KVM_MSR_RET_UNSUPPORTED; + if (!msr->host_initiated) + return 1; + svm->vmcb->save.br_to = data; + vmcb_mark_dirty(svm->vmcb, VMCB_LBR); + break; + case MSR_IA32_LASTINTFROMIP: + if (!lbrv) + return KVM_MSR_RET_UNSUPPORTED; + if (!msr->host_initiated) + return 1; + svm->vmcb->save.last_excp_from = data; + vmcb_mark_dirty(svm->vmcb, VMCB_LBR); + break; + case MSR_IA32_LASTINTTOIP: + if (!lbrv) + return KVM_MSR_RET_UNSUPPORTED; + if (!msr->host_initiated) + return 1; + svm->vmcb->save.last_excp_to = data; + vmcb_mark_dirty(svm->vmcb, VMCB_LBR); + break; case MSR_VM_HSAVE_PA: /* * Old kernels did not validate the value written to diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 24ed813952880..ad2b7158b9c8e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -350,6 +350,9 @@ static const u32 msrs_to_save_base[] = { MSR_IA32_U_CET, MSR_IA32_S_CET, MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP, MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB, + MSR_IA32_DEBUGCTLMSR, + MSR_IA32_LASTBRANCHFROMIP, MSR_IA32_LASTBRANCHTOIP, + MSR_IA32_LASTINTFROMIP, MSR_IA32_LASTINTTOIP, }; static const u32 msrs_to_save_pmu[] = { From 49c8b2395bacd552051a9bfacbfe6a60d396fdd8 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Mar 2026 00:33:59 +0000 Subject: [PATCH 185/554] KVM: nSVM: Always inject a #GP if mapping VMCB12 fails on nested VMRUN commit 01ddcdc55e097ca38c28ae656711b8e6d1df71f8 upstream. nested_svm_vmrun() currently only injects a #GP if kvm_vcpu_map() fails with -EINVAL. But it could also fail with -EFAULT if creating a host mapping failed. Inject a #GP in all cases, no reason to treat failure modes differently. Fixes: 8c5fbf1a7231 ("KVM/nSVM: Use the new mapping API for mapping guest memory") CC: stable@vger.kernel.org Co-developed-by: Sean Christopherson Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260303003421.2185681-6-yosry@kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 2c3a41305317d..77c8cc083253c 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -966,12 +966,9 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) } vmcb12_gpa = svm->vmcb->save.rax; - ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map); - if (ret == -EINVAL) { + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map)) { kvm_inject_gp(vcpu, 0); return 1; - } else if (ret) { - return kvm_skip_emulated_instruction(vcpu); } ret = kvm_skip_emulated_instruction(vcpu); From 72fe7d04a6698da23e73338883e43153024b95c5 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Mar 2026 00:34:00 +0000 Subject: [PATCH 186/554] KVM: nSVM: Refactor checking LBRV enablement in vmcb12 into a helper commit 290c8d82023ab0e1d2782d37136541e017174d7c upstream. Refactor the vCPU cap and vmcb12 flag checks into a helper. The unlikely() annotation is dropped, it's unlikely (huh) to make a difference and the CPU will probably predict it better on its own. CC: stable@vger.kernel.org Co-developed-by: Sean Christopherson Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260303003421.2185681-7-yosry@kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 77c8cc083253c..984f444f280e8 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -608,6 +608,12 @@ void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm) svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat; } +static bool nested_vmcb12_has_lbrv(struct kvm_vcpu *vcpu) +{ + return guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && + (to_svm(vcpu)->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK); +} + static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12) { bool new_vmcb12 = false; @@ -673,8 +679,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 vmcb_mark_dirty(vmcb02, VMCB_DR); } - if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && - (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { + if (nested_vmcb12_has_lbrv(vcpu)) { /* * Reserved bits of DEBUGCTL are ignored. Be consistent with * svm_set_msr's definition of reserved bits. @@ -1189,8 +1194,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (!nested_exit_on_intr(svm)) kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); - if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && - (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { + if (nested_vmcb12_has_lbrv(vcpu)) { svm_copy_lbrs(&vmcb12->save, &vmcb02->save); } else { svm_copy_lbrs(&vmcb01->save, &vmcb02->save); From 410ea5575c6675fe6aeac8ff89841ba7d4ef5a66 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Mar 2026 00:34:01 +0000 Subject: [PATCH 187/554] KVM: nSVM: Refactor writing vmcb12 on nested #VMEXIT as a helper commit dcf3648ab71437b504abbfdc4e74622a0f1a56e3 upstream. Move mapping vmcb12 and updating it out of nested_svm_vmexit() into a helper, no functional change intended. CC: stable@vger.kernel.org Co-developed-by: Sean Christopherson Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260303003421.2185681-8-yosry@kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 77 ++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 984f444f280e8..8ed06271ad1bb 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1082,36 +1082,20 @@ void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb) to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; } -int nested_svm_vmexit(struct vcpu_svm *svm) +static int nested_svm_vmexit_update_vmcb12(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = &svm->vcpu; - struct vmcb *vmcb01 = svm->vmcb01.ptr; + struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; - struct vmcb *vmcb12; struct kvm_host_map map; + struct vmcb *vmcb12; int rc; rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map); - if (rc) { - if (rc == -EINVAL) - kvm_inject_gp(vcpu, 0); - return 1; - } + if (rc) + return rc; vmcb12 = map.hva; - /* Exit Guest-Mode */ - leave_guest_mode(vcpu); - svm->nested.vmcb12_gpa = 0; - WARN_ON_ONCE(svm->nested.nested_run_pending); - - kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); - - /* in case we halted in L2 */ - kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); - - /* Give the current vmcb to the guest */ - vmcb12->save.es = vmcb02->save.es; vmcb12->save.cs = vmcb02->save.cs; vmcb12->save.ss = vmcb02->save.ss; @@ -1149,10 +1133,48 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) vmcb12->control.next_rip = vmcb02->control.next_rip; + if (nested_vmcb12_has_lbrv(vcpu)) + svm_copy_lbrs(&vmcb12->save, &vmcb02->save); + vmcb12->control.int_ctl = svm->nested.ctl.int_ctl; vmcb12->control.event_inj = svm->nested.ctl.event_inj; vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; + trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code, + vmcb12->control.exit_info_1, + vmcb12->control.exit_info_2, + vmcb12->control.exit_int_info, + vmcb12->control.exit_int_info_err, + KVM_ISA_SVM); + + kvm_vcpu_unmap(vcpu, &map); + return 0; +} + +int nested_svm_vmexit(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + struct vmcb *vmcb01 = svm->vmcb01.ptr; + struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; + int rc; + + rc = nested_svm_vmexit_update_vmcb12(vcpu); + if (rc) { + if (rc == -EINVAL) + kvm_inject_gp(vcpu, 0); + return 1; + } + + /* Exit Guest-Mode */ + leave_guest_mode(vcpu); + svm->nested.vmcb12_gpa = 0; + WARN_ON_ONCE(svm->nested.nested_run_pending); + + kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); + + /* in case we halted in L2 */ + kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); + if (!kvm_pause_in_guest(vcpu->kvm)) { vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count; vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); @@ -1194,9 +1216,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (!nested_exit_on_intr(svm)) kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); - if (nested_vmcb12_has_lbrv(vcpu)) { - svm_copy_lbrs(&vmcb12->save, &vmcb02->save); - } else { + if (!nested_vmcb12_has_lbrv(vcpu)) { svm_copy_lbrs(&vmcb01->save, &vmcb02->save); vmcb_mark_dirty(vmcb01, VMCB_LBR); } @@ -1252,15 +1272,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) svm->vcpu.arch.dr7 = DR7_FIXED_1; kvm_update_dr7(&svm->vcpu); - trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code, - vmcb12->control.exit_info_1, - vmcb12->control.exit_info_2, - vmcb12->control.exit_int_info, - vmcb12->control.exit_int_info_err, - KVM_ISA_SVM); - - kvm_vcpu_unmap(vcpu, &map); - nested_svm_transition_tlb_flush(vcpu); nested_svm_uninit_mmu_context(vcpu); From d6f6371bbea6d17cb947ddeb14348e2dd5f8f09f Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Mar 2026 00:34:02 +0000 Subject: [PATCH 188/554] KVM: nSVM: Triple fault if mapping VMCB12 fails on nested #VMEXIT commit 1b30e7551767cb95b3e49bb169c72bbd76b56e05 upstream. KVM currently injects a #GP and hopes for the best if mapping VMCB12 fails on nested #VMEXIT, and only if the failure mode is -EINVAL. Mapping the VMCB12 could also fail if creating host mappings fails. After the #GP is injected, nested_svm_vmexit() bails early, without cleaning up (e.g. KVM_REQ_GET_NESTED_STATE_PAGES is set, is_guest_mode() is true, etc). Instead of optionally injecting a #GP, triple fault the guest if mapping VMCB12 fails since KVM cannot make a sane recovery. The APM states that a #VMEXIT will triple fault if host state is illegal or an exception occurs while loading host state, so the behavior is not entirely made up. Do not return early from nested_svm_vmexit(), continue cleaning up the vCPU state (e.g. switch back to vmcb01), to handle the failure as gracefully as possible. Fixes: cf74a78b229d ("KVM: SVM: Add VMEXIT handler and intercepts") CC: stable@vger.kernel.org Co-developed-by: Sean Christopherson Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260303003421.2185681-9-yosry@kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 8ed06271ad1bb..fa012c0564227 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1158,12 +1158,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm) struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; int rc; - rc = nested_svm_vmexit_update_vmcb12(vcpu); - if (rc) { - if (rc == -EINVAL) - kvm_inject_gp(vcpu, 0); - return 1; - } + if (nested_svm_vmexit_update_vmcb12(vcpu)) + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); /* Exit Guest-Mode */ leave_guest_mode(vcpu); From c2fad967e652c307f4ddcc9e851dc72eda665393 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Mar 2026 00:34:04 +0000 Subject: [PATCH 189/554] KVM: nSVM: Clear GIF on nested #VMEXIT(INVALID) commit f85a6ce06e4a0d49652f57967a649ab09e06287c upstream. According to the APM, GIF is set to 0 on any #VMEXIT, including an #VMEXIT(INVALID) due to failed consistency checks. Clear GIF on consistency check failures. Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler") Cc: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260303003421.2185681-11-yosry@kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index fa012c0564227..f9919061da716 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -992,6 +992,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) vmcb12->control.exit_code_hi = -1u; vmcb12->control.exit_info_1 = 0; vmcb12->control.exit_info_2 = 0; + svm_set_gif(svm, false); goto out; } From b6656ba2652595ee88ca4ed92ec015206f7d9eb9 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Mar 2026 00:34:05 +0000 Subject: [PATCH 190/554] KVM: nSVM: Clear EVENTINJ fields in vmcb12 on nested #VMEXIT commit 69b721a86d0dcb026f6db7d111dcde7550442d2e upstream. According to the APM, from the reference of the VMRUN instruction: Upon #VMEXIT, the processor performs the following actions in order to return to the host execution context: ... clear EVENTINJ field in VMCB KVM already syncs EVENTINJ fields from vmcb02 to cached vmcb12 on every L2->L0 #VMEXIT. Since these fields are zeroed by the CPU on #VMEXIT, they will mostly be zeroed in vmcb12 on nested #VMEXIT by nested_svm_vmexit(). However, this is not the case when: 1. Consistency checks fail, as nested_svm_vmexit() is not called. 2. Entering guest mode fails before L2 runs (e.g. due to failed load of CR3). (2) was broken by commit 2d8a42be0e2b ("KVM: nSVM: synchronize VMCB controls updated by the processor on every vmexit"), as prior to that nested_svm_vmexit() always zeroed EVENTINJ fields. Explicitly clear the fields in all nested #VMEXIT code paths. Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler") Fixes: 2d8a42be0e2b ("KVM: nSVM: synchronize VMCB controls updated by the processor on every vmexit") Cc: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260303003421.2185681-12-yosry@kernel.org [sean: massage changelog formatting] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index f9919061da716..12e65501b27e3 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -992,6 +992,8 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) vmcb12->control.exit_code_hi = -1u; vmcb12->control.exit_info_1 = 0; vmcb12->control.exit_info_2 = 0; + vmcb12->control.event_inj = 0; + vmcb12->control.event_inj_err = 0; svm_set_gif(svm, false); goto out; } @@ -1137,9 +1139,9 @@ static int nested_svm_vmexit_update_vmcb12(struct kvm_vcpu *vcpu) if (nested_vmcb12_has_lbrv(vcpu)) svm_copy_lbrs(&vmcb12->save, &vmcb02->save); + vmcb12->control.event_inj = 0; + vmcb12->control.event_inj_err = 0; vmcb12->control.int_ctl = svm->nested.ctl.int_ctl; - vmcb12->control.event_inj = svm->nested.ctl.event_inj; - vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code, vmcb12->control.exit_info_1, From d79720fc99594e2ec8c7421d45ca64624e89d25c Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Mar 2026 00:34:06 +0000 Subject: [PATCH 191/554] KVM: nSVM: Clear tracking of L1->L2 NMI and soft IRQ on nested #VMEXIT commit 8998e1d012f3f45d0456f16706682cef04c3c436 upstream. KVM clears tracking of L1->L2 injected NMIs (i.e. nmi_l1_to_l2) and soft IRQs (i.e. soft_int_injected) on a synthesized #VMEXIT(INVALID) due to failed VMRUN. However, they are not explicitly cleared in other synthesized #VMEXITs. soft_int_injected is always cleared after the first VMRUN of L2 when completing interrupts, as any re-injection is then tracked by KVM (instead of purely in vmcb02). nmi_l1_to_l2 is not cleared after the first VMRUN if NMI injection failed, as KVM still needs to keep track that the NMI originated from L1 to avoid blocking NMIs for L1. It is only cleared when the NMI injection succeeds. KVM could synthesize a #VMEXIT to L1 before successfully injecting the NMI into L2 (e.g. due to a #NPF on L2's NMI handler in L1's NPTs). In this case, nmi_l1_to_l2 will remain true, and KVM may not correctly mask NMIs and intercept IRET when injecting an NMI into L1. Clear both nmi_l1_to_l2 and soft_int_injected in nested_svm_vmexit(), i.e. for all #VMEXITs except those that occur due to failed consistency checks, as those happen before nmi_l1_to_l2 or soft_int_injected are set. Fixes: 159fc6fa3b7d ("KVM: nSVM: Transparently handle L1 -> L2 NMI re-injection") Cc: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260303003421.2185681-13-yosry@kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 12e65501b27e3..0bfbc9378b23b 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1021,8 +1021,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) out_exit_err: svm->nested.nested_run_pending = 0; - svm->nmi_l1_to_l2 = false; - svm->soft_int_injected = false; svm->vmcb->control.exit_code = SVM_EXIT_ERR; svm->vmcb->control.exit_code_hi = -1u; @@ -1279,6 +1277,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (rc) return 1; + /* Drop tracking for L1->L2 injected NMIs and soft IRQs */ + svm->nmi_l1_to_l2 = false; + svm->soft_int_injected = false; + /* * Drop what we picked up for L2 via svm_complete_interrupts() so it * doesn't end up in L1. From aabd785f9565bcc3c9651d48cf46a8f43bc46f35 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Mar 2026 00:34:10 +0000 Subject: [PATCH 192/554] KVM: nSVM: Add missing consistency check for EFER, CR0, CR4, and CS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 96bd3e76a171a8e21a6387e54e4c420a81968492 upstream. According to the APM Volume #2, 15.5, Canonicalization and Consistency Checks (24593—Rev. 3.42—March 2024), the following condition (among others) results in a #VMEXIT with VMEXIT_INVALID (aka SVM_EXIT_ERR): EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero. In the list of consistency checks done when EFER.LME and CR0.PG are set, add a check that CS.L and CS.D are not both set, after the existing check that CR4.PAE is set. This is functionally a nop because the nested VMRUN results in SVM_EXIT_ERR in HW, which is forwarded to L1, but KVM makes all consistency checks before a VMRUN is actually attempted. Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler") Cc: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260303003421.2185681-17-yosry@kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 6 ++++++ arch/x86/kvm/svm/svm.h | 1 + 2 files changed, 7 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 0bfbc9378b23b..75e63fc4616ab 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -377,6 +377,10 @@ static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu, CC(!(save->cr0 & X86_CR0_PE)) || CC(!kvm_vcpu_is_legal_cr3(vcpu, save->cr3))) return false; + + if (CC((save->cs.attrib & SVM_SELECTOR_L_MASK) && + (save->cs.attrib & SVM_SELECTOR_DB_MASK))) + return false; } /* Note, SVM doesn't have any additional restrictions on CR4. */ @@ -465,6 +469,8 @@ static void __nested_copy_vmcb_save_to_cache(struct vmcb_save_area_cached *to, * Copy only fields that are validated, as we need them * to avoid TOC/TOU races. */ + to->cs = from->cs; + to->efer = from->efer; to->cr0 = from->cr0; to->cr3 = from->cr3; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index b713efe65dc90..54e30b08801fc 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -142,6 +142,7 @@ struct kvm_vmcb_info { }; struct vmcb_save_area_cached { + struct vmcb_seg cs; u64 efer; u64 cr4; u64 cr3; From 0d1f3fd2664b22f95f8589bd583b929a110cd013 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Mar 2026 00:34:08 +0000 Subject: [PATCH 193/554] KVM: nSVM: Drop the non-architectural consistency check for NP_ENABLE commit e0b6f031d64c086edd563e7af9c0c0a2261dd2a4 upstream. KVM currenty fails a nested VMRUN and injects VMEXIT_INVALID (aka SVM_EXIT_ERR) if L1 sets NP_ENABLE and the host does not support NPTs. On first glance, it seems like the check should actually be for guest_cpu_cap_has(X86_FEATURE_NPT) instead, as it is possible for the host to support NPTs but the guest CPUID to not advertise it. However, the consistency check is not architectural to begin with. The APM does not mention VMEXIT_INVALID if NP_ENABLE is set on a processor that does not have X86_FEATURE_NPT. Hence, NP_ENABLE should be ignored if X86_FEATURE_NPT is not available for L1, so sanitize it when copying from the VMCB12 to KVM's cache. Apart from the consistency check, NP_ENABLE in VMCB12 is currently ignored because the bit is actually copied from VMCB01 to VMCB02, not from VMCB12. Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN") Cc: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260303003421.2185681-15-yosry@kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 75e63fc4616ab..759ce6c24879d 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -335,9 +335,6 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu, if (CC(control->asid == 0)) return false; - if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled)) - return false; - if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa, MSRPM_SIZE))) return false; @@ -419,6 +416,11 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu, for (i = 0; i < MAX_INTERCEPT; i++) to->intercepts[i] = from->intercepts[i]; + /* Always clear SVM_NESTED_CTL_NP_ENABLE if the guest cannot use NPTs */ + to->nested_ctl = from->nested_ctl; + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NPT)) + to->nested_ctl &= ~SVM_NESTED_CTL_NP_ENABLE; + to->iopm_base_pa = from->iopm_base_pa; to->msrpm_base_pa = from->msrpm_base_pa; to->tsc_offset = from->tsc_offset; @@ -432,7 +434,6 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu, to->exit_info_2 = from->exit_info_2; to->exit_int_info = from->exit_int_info; to->exit_int_info_err = from->exit_int_info_err; - to->nested_ctl = from->nested_ctl; to->event_inj = from->event_inj; to->event_inj_err = from->event_inj_err; to->next_rip = from->next_rip; From 83f7e055c138b7a30d4c04af1812f7a88813aaa6 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 3 Mar 2026 00:34:09 +0000 Subject: [PATCH 194/554] KVM: nSVM: Add missing consistency check for nCR3 validity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b71138fcc362c67ebe66747bb22cb4e6b4d6a651 upstream. From the APM Volume #2, 15.25.4 (24593—Rev. 3.42—March 2024): When VMRUN is executed with nested paging enabled (NP_ENABLE = 1), the following conditions are considered illegal state combinations, in addition to those mentioned in “Canonicalization and Consistency Checks”: • Any MBZ bit of nCR3 is set. • Any G_PAT.PA field has an unsupported type encoding or any reserved field in G_PAT has a nonzero value. Add the consistency check for nCR3 being a legal GPA with no MBZ bits set. Note, the G_PAT.PA check is being handled separately[*]. Link: https://lore.kernel.org/kvm/20260205214326.1029278-3-jmattson@google.com [*] Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN") Cc: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260303003421.2185681-16-yosry@kernel.org [sean: capture everything in CC(), massage changelog formatting] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 759ce6c24879d..1105c62c2103e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -335,6 +335,10 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu, if (CC(control->asid == 0)) return false; + if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && + !kvm_vcpu_is_legal_gpa(vcpu, control->nested_cr3))) + return false; + if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa, MSRPM_SIZE))) return false; From 009c0f726abeaa67aad1d96b883bdce01d405ce2 Mon Sep 17 00:00:00 2001 From: Kevin Cheng Date: Tue, 3 Mar 2026 16:22:22 -0800 Subject: [PATCH 195/554] KVM: nSVM: Raise #UD if unhandled VMMCALL isn't intercepted by L1 commit c36991c6f8d2ab56ee67aff04e3c357f45cfc76c upstream. Explicitly synthesize a #UD for VMMCALL if L2 is active, L1 does NOT want to intercept VMMCALL, nested_svm_l2_tlb_flush_enabled() is true, and the hypercall is something other than one of the supported Hyper-V hypercalls. When all of the above conditions are met, KVM will intercept VMMCALL but never forward it to L1, i.e. will let L2 make hypercalls as if it were L1. The TLFS says a whole lot of nothing about this scenario, so go with the architectural behavior, which says that VMMCALL #UDs if it's not intercepted. Opportunistically do a 2-for-1 stub trade by stub-ifying the new API instead of the helpers it uses. The last remaining "single" stub will soon be dropped as well. Suggested-by: Sean Christopherson Fixes: 3f4a812edf5c ("KVM: nSVM: hyper-v: Enable L2 TLB flush") Cc: Vitaly Kuznetsov Cc: stable@vger.kernel.org Signed-off-by: Kevin Cheng Link: https://patch.msgid.link/20260228033328.2285047-5-chengkev@google.com [sean: rewrite changelog and comment, tag for stable, remove defunct stubs] Reviewed-by: Yosry Ahmed Reviewed-by: Vitaly Kuznetsov Link: https://patch.msgid.link/20260304002223.1105129-2-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/hyperv.h | 8 -------- arch/x86/kvm/svm/hyperv.h | 11 +++++++++++ arch/x86/kvm/svm/nested.c | 4 +--- arch/x86/kvm/svm/svm.c | 19 ++++++++++++++++++- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 6ce160ffa6786..6301f79fcbae7 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -305,14 +305,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu) { return false; } -static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu) -{ - return false; -} -static inline bool guest_hv_cpuid_has_l2_tlb_flush(struct kvm_vcpu *vcpu) -{ - return false; -} static inline int kvm_hv_verify_vp_assist(struct kvm_vcpu *vcpu) { return 0; diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h index d3f8bfc05832e..9af03970d40c2 100644 --- a/arch/x86/kvm/svm/hyperv.h +++ b/arch/x86/kvm/svm/hyperv.h @@ -41,6 +41,13 @@ static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu) return hv_vcpu->vp_assist_page.nested_control.features.directhypercall; } +static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu) +{ + return guest_hv_cpuid_has_l2_tlb_flush(vcpu) && + nested_svm_l2_tlb_flush_enabled(vcpu) && + kvm_hv_is_tlb_flush_hcall(vcpu); +} + void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu); #else /* CONFIG_KVM_HYPERV */ static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) {} @@ -48,6 +55,10 @@ static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu) { return false; } +static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu) +{ + return false; +} static inline void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) {} #endif /* CONFIG_KVM_HYPERV */ diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 1105c62c2103e..bb250449fd414 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1672,9 +1672,7 @@ int nested_svm_exit_special(struct vcpu_svm *svm) } case SVM_EXIT_VMMCALL: /* Hyper-V L2 TLB flush hypercall is handled by L0 */ - if (guest_hv_cpuid_has_l2_tlb_flush(vcpu) && - nested_svm_l2_tlb_flush_enabled(vcpu) && - kvm_hv_is_tlb_flush_hcall(vcpu)) + if (nested_svm_is_l2_tlb_flush_hcall(vcpu)) return NESTED_EXIT_HOST; break; default: diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 35e24f8f060ad..7795e4777da7e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -52,6 +52,7 @@ #include "svm.h" #include "svm_ops.h" +#include "hyperv.h" #include "kvm_onhyperv.h" #include "svm_onhyperv.h" @@ -3177,6 +3178,22 @@ static int bus_lock_exit(struct kvm_vcpu *vcpu) return 0; } +static int vmmcall_interception(struct kvm_vcpu *vcpu) +{ + /* + * Inject a #UD if L2 is active and the VMMCALL isn't a Hyper-V TLB + * hypercall, as VMMCALL #UDs if it's not intercepted, and this path is + * reachable if and only if L1 doesn't want to intercept VMMCALL or has + * enabled L0 (KVM) handling of Hyper-V L2 TLB flush hypercalls. + */ + if (is_guest_mode(vcpu) && !nested_svm_is_l2_tlb_flush_hcall(vcpu)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + return kvm_emulate_hypercall(vcpu); +} + static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { [SVM_EXIT_READ_CR0] = cr_interception, [SVM_EXIT_READ_CR3] = cr_interception, @@ -3227,7 +3244,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { [SVM_EXIT_TASK_SWITCH] = task_switch_interception, [SVM_EXIT_SHUTDOWN] = shutdown_interception, [SVM_EXIT_VMRUN] = vmrun_interception, - [SVM_EXIT_VMMCALL] = kvm_emulate_hypercall, + [SVM_EXIT_VMMCALL] = vmmcall_interception, [SVM_EXIT_VMLOAD] = vmload_interception, [SVM_EXIT_VMSAVE] = vmsave_interception, [SVM_EXIT_STGI] = stgi_interception, From 5955e053ff001702257bce630f3c41153812aefb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 3 Mar 2026 16:22:23 -0800 Subject: [PATCH 196/554] KVM: nSVM: Always intercept VMMCALL when L2 is active commit 33d3617a52f9930d22b2af59f813c2fbdefa6dd5 upstream. Always intercept VMMCALL now that KVM properly synthesizes a #UD as appropriate, i.e. when L1 doesn't want to intercept VMMCALL, to avoid putting L2 into an infinite #UD loop if KVM_X86_QUIRK_FIX_HYPERCALL_INSN is enabled. By letting L2 execute VMMCALL natively and thus #UD, for all intents and purposes KVM morphs the VMMCALL intercept into a #UD intercept (KVM always intercepts #UD). When the hypercall quirk is enabled, KVM "emulates" VMMCALL in response to the #UD by trying to fixup the opcode to the "right" vendor, then restarts the guest, without skipping the VMMCALL. As a result, the guest sees an endless stream of #UDs since it's already executing the correct vendor hypercall instruction, i.e. the emulator doesn't anticipate that the #UD could be due to lack of interception, as opposed to a truly undefined opcode. Fixes: 0d945bd93511 ("KVM: SVM: Don't allow nested guest to VMMCALL into host") Cc: stable@vger.kernel.org Reviewed-by: Yosry Ahmed Reviewed-by: Vitaly Kuznetsov Link: https://patch.msgid.link/20260304002223.1105129-3-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/hyperv.h | 4 ---- arch/x86/kvm/svm/nested.c | 7 ------- 2 files changed, 11 deletions(-) diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h index 9af03970d40c2..f70d076911a63 100644 --- a/arch/x86/kvm/svm/hyperv.h +++ b/arch/x86/kvm/svm/hyperv.h @@ -51,10 +51,6 @@ static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu) void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu); #else /* CONFIG_KVM_HYPERV */ static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) {} -static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu) -{ - return false; -} static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu) { return false; diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index bb250449fd414..17c6ff201fa1d 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -159,13 +159,6 @@ void recalc_intercepts(struct vcpu_svm *svm) vmcb_clr_intercept(c, INTERCEPT_VINTR); } - /* - * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB - * flush feature is enabled. - */ - if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu)) - vmcb_clr_intercept(c, INTERCEPT_VMMCALL); - for (i = 0; i < MAX_INTERCEPT; i++) c->intercepts[i] |= g->intercepts[i]; From cd2689d625b01165442d181a9b2eb18c41baf5f8 Mon Sep 17 00:00:00 2001 From: Rong Bao Date: Fri, 1 May 2026 20:22:05 +0800 Subject: [PATCH 197/554] perf annotate: Use jump__delete when freeing LoongArch jumps [ Upstream commit a355eefc36c4481188249b067832b40a2c45fa5c ] Currently, the initialization of loongarch_jump_ops does not contain an assignment to its .free field. This causes disasm_line__free() to fall through to ins_ops__delete() for LoongArch jump instructions. ins_ops__delete() will free ins_operands.source.raw and ins_operands.source.name, and these fields overlaps with ins_operands.jump.raw_comment and ins_operands.jump.raw_func_start. Since in loongarch_jump__parse(), these two fields are populated by strchr()-ing the same buffer, trying to free them will lead to undefined behavior. This invalid free usually leads to crashes: Process 1712902 (perf) of user 1000 dumped core. Stack trace of thread 1712902: #0 0x00007fffef155c58 n/a (libc.so.6 + 0x95c58) #1 0x00007fffef0f7a94 raise (libc.so.6 + 0x37a94) #2 0x00007fffef0dd6a8 abort (libc.so.6 + 0x1d6a8) #3 0x00007fffef145490 n/a (libc.so.6 + 0x85490) #4 0x00007fffef1646f4 n/a (libc.so.6 + 0xa46f4) #5 0x00007fffef164718 n/a (libc.so.6 + 0xa4718) #6 0x00005555583a6764 __zfree (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x106764) #7 0x000055555854fb70 disasm_line__free (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x2afb70) #8 0x000055555853d618 annotated_source__purge (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x29d618) #9 0x000055555852300c __hist_entry__tui_annotate (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x28300c) #10 0x0000555558526718 do_annotate (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x286718) #11 0x000055555852ed94 evsel__hists_browse (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x28ed94) #12 0x000055555831fdd0 cmd_report (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x7fdd0) #13 0x000055555839b644 handle_internal_command (/home/csmantle/dist/linux-arch/tools/perf/perf + 0xfb644) #14 0x00005555582fe6ac main (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x5e6ac) #15 0x00007fffef0ddd90 n/a (libc.so.6 + 0x1dd90) #16 0x00007fffef0ddf0c __libc_start_main (libc.so.6 + 0x1df0c) #17 0x00005555582fed10 _start (/home/csmantle/dist/linux-arch/tools/perf/perf + 0x5ed10) ELF object binary architecture: LoongArch ... and it can be confirmed with Valgrind: ==1721834== Invalid free() / delete / delete[] / realloc() ==1721834== at 0x4EA9014: free (in /usr/lib/valgrind/vgpreload_memcheck-loongarch64-linux.so) ==1721834== by 0x4106287: __zfree (zalloc.c:13) ==1721834== by 0x42ADC8F: disasm_line__free (in /home/csmantle/dist/linux-arch/tools/perf/perf) ==1721834== by 0x429B737: annotated_source__purge (in /home/csmantle/dist/linux-arch/tools/perf/perf) ==1721834== by 0x42811EB: __hist_entry__tui_annotate (in /home/csmantle/dist/linux-arch/tools/perf/perf) ==1721834== by 0x42848D7: do_annotate (in /home/csmantle/dist/linux-arch/tools/perf/perf) ==1721834== by 0x428CF33: evsel__hists_browse (in /home/csmantle/dist/linux-arch/tools/perf/perf) ==1721834== Address 0x7d34303 is 35 bytes inside a block of size 62 alloc'd ==1721834== at 0x4EA59B8: malloc (in /usr/lib/valgrind/vgpreload_memcheck-loongarch64-linux.so) ==1721834== by 0x6B80B6F: strdup (strdup.c:42) ==1721834== by 0x42AD917: disasm_line__new (in /home/csmantle/dist/linux-arch/tools/perf/perf) ==1721834== by 0x42AE5A3: symbol__disassemble_objdump (in /home/csmantle/dist/linux-arch/tools/perf/perf) ==1721834== by 0x42AF0A7: symbol__disassemble (in /home/csmantle/dist/linux-arch/tools/perf/perf) ==1721834== by 0x429B3CF: symbol__annotate (in /home/csmantle/dist/linux-arch/tools/perf/perf) ==1721834== by 0x429C233: symbol__annotate2 (in /home/csmantle/dist/linux-arch/tools/perf/perf) ==1721834== by 0x42804D3: __hist_entry__tui_annotate (in /home/csmantle/dist/linux-arch/tools/perf/perf) ==1721834== by 0x42848D7: do_annotate (in /home/csmantle/dist/linux-arch/tools/perf/perf) ==1721834== by 0x428CF33: evsel__hists_browse (in /home/csmantle/dist/linux-arch/tools/perf/perf) This patch adds the missing free() specialization in loongarch_jump_ops, which prevents disasm_line__free() from invoking the default cleanup function. Fixes: fb7fd2a14a503b9a ("perf annotate: Move raw_comment and raw_func_start fields out of 'struct ins_operands'") Cc: stable@vger.kernel.org Cc: WANG Rui Cc: Huacai Chen Cc: WANG Xuerui Cc: loongarch@lists.linux.dev Signed-off-by: Rong Bao Tested-by: WANG Rui Signed-off-by: Namhyung Kim Signed-off-by: Sasha Levin --- tools/perf/arch/loongarch/annotate/instructions.c | 1 + tools/perf/util/disasm.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c index 1c3abb43c8d72..1a0a1dacebc30 100644 --- a/tools/perf/arch/loongarch/annotate/instructions.c +++ b/tools/perf/arch/loongarch/annotate/instructions.c @@ -97,6 +97,7 @@ static int loongarch_jump__parse(struct arch *arch, struct ins_operands *ops, st } static struct ins_ops loongarch_jump_ops = { + .free = jump__delete, .parse = loongarch_jump__parse, .scnprintf = jump__scnprintf, }; diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index b1be847446fea..c513db41137fa 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -47,6 +47,7 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops, int max_ins_name); static int call__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops, int max_ins_name); +static void jump__delete(struct ins_operands *ops); static void ins__sort(struct arch *arch); static int disasm_line__parse(char *line, const char **namep, char **rawp); From f9c279ffee6b13ff87d24cef33d401e39fb20eb8 Mon Sep 17 00:00:00 2001 From: Brian Ruley Date: Wed, 15 Apr 2026 18:12:48 +0100 Subject: [PATCH 198/554] ARM: 9472/1: fix race condition on PG_dcache_clean in __sync_icache_dcache() commit 75f9a484e817adea211c73f89ed938a2b2f90953 upstream. This bug was already discovered and fixed for arm64 in commit 588a513d3425 ("arm64: Fix race condition on PG_dcache_clean in __sync_icache_dcache()"). Verified with added instrumentation to track dcache flushes in a ring buffer, as shown by the (distilled) output: kernel: SIGILL at b6b80ac0 cpu 1 pid 32663 linux_pte=8eff659f hw_pte=8eff6e7e young=1 exec=1 kernel: dcache flush START cpu0 pfn=8eff6 ts=48629557020154 kernel: dcache flush SKIPPED cpu1 pfn=8eff6 ts=48629557020154 kernel: dcache flush FINISH cpu0 pfn=8eff6 ts=48629557036154 audisp-syslog: comm="journalctl" exe="/usr/bin/journalctl" sig=4 [...] Discussions in the mailing list mentioned that arch/arm is also affected but the fix was never applied to it [1][2]. Apply the change now, since the race condition can cause sporadic SIGILL's and SEGV's especially while under high memory pressure. Link: https://lore.kernel.org/all/adzMOdySgMIePcue@willie-the-truck [1] Link: https://lore.kernel.org/all/20210514095001.13236-1-catalin.marinas@arm.com [2] Signed-off-by: Brian Ruley Reviewed-by: Will Deacon Cc: Fixes: 6012191aa9c6 ("ARM: 6380/1: Introduce __sync_icache_dcache() for VIPT caches") Signed-off-by: Will Deacon Signed-off-by: Russell King (Oracle) Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/flush.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 19470d938b236..4d7ef5cc36b66 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -304,8 +304,10 @@ void __sync_icache_dcache(pte_t pteval) else mapping = NULL; - if (!test_and_set_bit(PG_dcache_clean, &folio->flags.f)) + if (!test_bit(PG_dcache_clean, &folio->flags.f)) { __flush_dcache_folio(mapping, folio); + set_bit(PG_dcache_clean, &folio->flags.f); + } if (pte_exec(pteval)) __flush_icache_all(); From 6b4bf6519e5071492f7bcd3e237dceb57457b8b6 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 24 Apr 2026 15:52:10 +0900 Subject: [PATCH 199/554] ring-buffer: Do not double count the reader_page commit 92d5a606721f759ebebf448b3bd2b7a781d50bd0 upstream. Since the cpu_buffer->reader_page is updated if there are unwound pages. After that update, we should skip the page if it is the original reader_page, because the original reader_page is already checked. Cc: stable@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Mathieu Desnoyers Cc: Ian Rogers Link: https://patch.msgid.link/177701353063.2223789.1471163147644103306.stgit@mhiramat.tok.corp.google.com Fixes: ca296d32ece3 ("tracing: ring_buffer: Rewind persistent ring buffer on reboot") Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 656cd8df2d55d..a31fb4b7a52ea 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1877,7 +1877,7 @@ static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu) static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) { struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; - struct buffer_page *head_page, *orig_head; + struct buffer_page *head_page, *orig_head, *orig_reader; unsigned long entry_bytes = 0; unsigned long entries = 0; int ret; @@ -1888,16 +1888,17 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) return; orig_head = head_page = cpu_buffer->head_page; + orig_reader = cpu_buffer->reader_page; /* Do the reader page first */ - ret = rb_validate_buffer(cpu_buffer->reader_page->page, cpu_buffer->cpu); + ret = rb_validate_buffer(orig_reader->page, cpu_buffer->cpu); if (ret < 0) { pr_info("Ring buffer reader page is invalid\n"); goto invalid; } entries += ret; - entry_bytes += local_read(&cpu_buffer->reader_page->page->commit); - local_set(&cpu_buffer->reader_page->entries, ret); + entry_bytes += local_read(&orig_reader->page->commit); + local_set(&orig_reader->entries, ret); ts = head_page->page->time_stamp; @@ -2000,8 +2001,8 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) /* Iterate until finding the commit page */ for (i = 0; i < meta->nr_subbufs + 1; i++, rb_inc_page(&head_page)) { - /* Reader page has already been done */ - if (head_page == cpu_buffer->reader_page) + /* The original reader page has already been checked/counted. */ + if (head_page == orig_reader) continue; ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu); From 537e065977022aa22f2c2503e8accaf16622e0fd Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Sat, 28 Mar 2026 20:30:38 +0530 Subject: [PATCH 200/554] ext4: fix bounds check in check_xattrs() to prevent out-of-bounds access commit eceafc31ea7b42c984ece10d79d505c0bb6615d5 upstream. The bounds check for the next xattr entry in check_xattrs() uses (void *)next >= end, which allows next to point within sizeof(u32) bytes of end. On the next loop iteration, IS_LAST_ENTRY() reads 4 bytes via *(__u32 *)(entry), which can overrun the valid xattr region. For example, if next lands at end - 1, the check passes since next < end, but IS_LAST_ENTRY() reads 4 bytes starting at end - 1, accessing 3 bytes beyond the valid region. Fix this by changing the check to (void *)next + sizeof(u32) > end, ensuring there is always enough space for the IS_LAST_ENTRY() read on the subsequent iteration. Fixes: 3478c83cf26b ("ext4: improve xattr consistency checking and error reporting") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20260224231429.31361-1-kartikey406@gmail.com/T/ [v1] Signed-off-by: Deepanshu Kartikey Link: https://patch.msgid.link/20260328150038.349497-1-kartikey406@gmail.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 4ed8ddf2a60b3..6ecdcb389e1b0 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -226,7 +226,7 @@ check_xattrs(struct inode *inode, struct buffer_head *bh, /* Find the end of the names list */ while (!IS_LAST_ENTRY(e)) { struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); - if ((void *)next >= end) { + if ((void *)next + sizeof(u32) > end) { err_str = "e_name out of bounds"; goto errout; } From 1e6b0a69bf2c9c819255c7566e4355536d81d9cf Mon Sep 17 00:00:00 2001 From: Sohei Koyama Date: Mon, 6 Apr 2026 16:48:30 +0900 Subject: [PATCH 201/554] ext4: fix missing brelse() in ext4_xattr_inode_dec_ref_all() commit 77d059519382bd66283e6a4e83ee186e87e7708f upstream. The commit c8e008b60492 ("ext4: ignore xattrs past end") introduced a refcount leak in when block_csum is false. ext4_xattr_inode_dec_ref_all() calls ext4_get_inode_loc() to get iloc.bh, but never releases it with brelse(). Fixes: c8e008b60492 ("ext4: ignore xattrs past end") Signed-off-by: Sohei Koyama Reviewed-by: Andreas Dilger Reviewed-by: Ritesh Harjani (IBM) Cc: stable@vger.kernel.org Reviewed-by: Zhang Yi Reviewed-by: Baokun Li Link: https://patch.msgid.link/20260406074830.8480-1-skoyama@ddn.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 6ecdcb389e1b0..f0522a4538d19 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1165,7 +1165,7 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent, { struct inode *ea_inode; struct ext4_xattr_entry *entry; - struct ext4_iloc iloc; + struct ext4_iloc iloc = { .bh = NULL }; bool dirty = false; unsigned int ea_ino; int err; @@ -1260,6 +1260,8 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent, ext4_warning_inode(parent, "handle dirty metadata err=%d", err); } + + brelse(iloc.bh); } /* From 134975be170b4772288319d4756b8edd04888cd3 Mon Sep 17 00:00:00 2001 From: Sanjaikumar V S Date: Wed, 11 Mar 2026 10:30:56 +0000 Subject: [PATCH 202/554] mtd: spi-nor: sst: Fix write enable before AAI sequence commit a0f64241d3566a49c0a9b33ba7ae458ae22003a9 upstream. When writing to SST flash starting at an odd address, a single byte is first programmed using the byte program (BP) command. After this operation completes, the flash hardware automatically clears the Write Enable Latch (WEL) bit. If an AAI (Auto Address Increment) word program sequence follows, it requires WEL to be set. Without re-enabling writes, the AAI sequence fails. Add spi_nor_write_enable() after the odd-address byte program when more data needs to be written. Use a local boolean for clarity. Fixes: b199489d37b2 ("mtd: spi-nor: add the framework for SPI NOR") Cc: stable@vger.kernel.org Signed-off-by: Sanjaikumar V S Tested-by: Hendrik Donner Reviewed-by: Hendrik Donner Signed-off-by: Pratyush Yadav (Google) Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/sst.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/mtd/spi-nor/sst.c b/drivers/mtd/spi-nor/sst.c index 175211fe6a5ed..db02c14ba16fa 100644 --- a/drivers/mtd/spi-nor/sst.c +++ b/drivers/mtd/spi-nor/sst.c @@ -203,6 +203,8 @@ static int sst_nor_write(struct mtd_info *mtd, loff_t to, size_t len, /* Start write from odd address. */ if (to % 2) { + bool needs_write_enable = (len > 1); + /* write one byte. */ ret = sst_nor_write_data(nor, to, 1, buf); if (ret < 0) @@ -210,6 +212,17 @@ static int sst_nor_write(struct mtd_info *mtd, loff_t to, size_t len, to++; actual++; + + /* + * Byte program clears the write enable latch. If more + * data needs to be written using the AAI sequence, + * re-enable writes. + */ + if (needs_write_enable) { + ret = spi_nor_write_enable(nor); + if (ret) + goto out; + } } /* Write out most of the data here. */ From ac858718219f9cd9807408196da21235cd303150 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 25 Mar 2026 18:04:50 +0100 Subject: [PATCH 203/554] mtd: spinand: winbond: Declare the QE bit on W25NxxJW commit 7866ce992cf0d3c3b50fe8bf4acb1dbb173a2304 upstream. Factory default for this bit is "set" (at least on the chips I have), but we must make sure it is actually set by Linux explicitly, as the bit is writable by an earlier stage. Fixes: 6a804fb72de5 ("mtd: spinand: winbond: add support for serial NAND flash") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/spi/winbond.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c index 4870b2d5edb2a..af377b917a107 100644 --- a/drivers/mtd/nand/spi/winbond.c +++ b/drivers/mtd/nand/spi/winbond.c @@ -428,7 +428,7 @@ static const struct spinand_info winbond_spinand_table[] = { SPINAND_INFO_OP_VARIANTS(&read_cache_dual_quad_dtr_variants, &write_cache_variants, &update_cache_variants), - 0, + SPINAND_HAS_QE_BIT, SPINAND_ECCINFO(&w25n01jw_ooblayout, NULL), SPINAND_CONFIGURE_CHIP(w25n0xjw_hs_cfg)), SPINAND_INFO("W25N01KV", /* 3.3V */ @@ -488,7 +488,7 @@ static const struct spinand_info winbond_spinand_table[] = { SPINAND_INFO_OP_VARIANTS(&read_cache_dual_quad_dtr_variants, &write_cache_variants, &update_cache_variants), - 0, + SPINAND_HAS_QE_BIT, SPINAND_ECCINFO(&w25m02gv_ooblayout, NULL), SPINAND_CONFIGURE_CHIP(w25n0xjw_hs_cfg)), SPINAND_INFO("W25N02KV", /* 3.3V */ From f7d9d0e4a64fe42c4f25e2a4fbabe3887efbb490 Mon Sep 17 00:00:00 2001 From: "David (Ming Qiang) Wu" Date: Mon, 9 Mar 2026 18:48:37 -0400 Subject: [PATCH 204/554] amdgpu/jpeg: fix deepsleep register for jpeg 5_0_0 and 5_0_2 commit e90dc3b2d73986610476b02c29d0074aa4d92fb0 upstream. PCTL0__MMHUB_DEEPSLEEP_IB is 0x69004 on MMHUB 4,1,0 and and 0x60804 on MMHUB 4,2,0. 0x62a04 is on MMHUB 1,8,0/1. The DS bits are adjusted to cover more JPEG engines and MMHUB version. Signed-off-by: David (Ming Qiang) Wu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 52 +++++++++++++++++++++--- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index aae7328973d18..47f2192fc7e7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -736,15 +736,35 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) */ void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) { - if (!amdgpu_sriov_vf(ring->adev)) { + struct amdgpu_device *adev = ring->adev; + + if (!amdgpu_sriov_vf(adev)) { + int jpeg_inst = GET_INST(JPEG, ring->me); + uint32_t value = 0x80004000; /* default DS14 */ + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ + + /* PCTL0__MMHUB_DEEPSLEEP_IB could be different on different mmhub version */ + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + amdgpu_ring_write(ring, 0x69004); + value = 0x80010000; + break; + case IP_VERSION(4, 2, 0): + amdgpu_ring_write(ring, 0x60804); + if (jpeg_inst & 1) + value = 0x80010000; + break; + default: + amdgpu_ring_write(ring, 0x62a04); + break; + } amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x80004000); + amdgpu_ring_write(ring, value); } } @@ -757,15 +777,35 @@ void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) */ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) { - if (!amdgpu_sriov_vf(ring->adev)) { + struct amdgpu_device *adev = ring->adev; + + if (!amdgpu_sriov_vf(adev)) { + int jpeg_inst = GET_INST(JPEG, ring->me); + uint32_t value = 0x00004000; /* default DS14 */ + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x62a04); + + /* PCTL0__MMHUB_DEEPSLEEP_IB could be different on different mmhub version */ + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + amdgpu_ring_write(ring, 0x69004); + value = 0x00010000; + break; + case IP_VERSION(4, 2, 0): + amdgpu_ring_write(ring, 0x60804); + if (jpeg_inst & 1) + value = 0x00010000; + break; + default: + amdgpu_ring_write(ring, 0x62a04); + break; + } amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x00004000); + amdgpu_ring_write(ring, value); } } From 98623c7e2a51eab1833c8628d33fa9c6ef3ce325 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 23 Feb 2026 10:40:34 +0800 Subject: [PATCH 205/554] md/md-llbitmap: skip reading rdevs that are not in_sync commit 7701e68b5072faa03a8f30b4081dc16df9092381 upstream. When reading bitmap pages from member disks, the code iterates through all rdevs and attempts to read from the first available one. However, it only checks for raid_disk assignment and Faulty flag, missing the In_sync flag check. This can cause bitmap data to be read from spare disks that are still being rebuilt and don't have valid bitmap information yet. Reading stale or uninitialized bitmap data from such disks can lead to incorrect dirty bit tracking, potentially causing data corruption during recovery or normal operation. Add the In_sync flag check to ensure bitmap pages are only read from fully synchronized member disks that have valid bitmap data. Cc: stable@vger.kernel.org Fixes: 5ab829f1971d ("md/md-llbitmap: introduce new lockless bitmap") Link: https://lore.kernel.org/linux-raid/20260223024038.3084853-2-yukuai@fnnas.com Signed-off-by: Yu Kuai Signed-off-by: Greg Kroah-Hartman --- drivers/md/md-llbitmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c index bcb6eae1c711f..a8fd81c274027 100644 --- a/drivers/md/md-llbitmap.c +++ b/drivers/md/md-llbitmap.c @@ -459,7 +459,8 @@ static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx) rdev_for_each(rdev, mddev) { sector_t sector; - if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags)) + if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags) || + !test_bit(In_sync, &rdev->flags)) continue; sector = mddev->bitmap_info.offset + From 9142f00a9287ca38152717e3e88a033a27774e7f Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 23 Feb 2026 10:40:35 +0800 Subject: [PATCH 206/554] md/md-llbitmap: raise barrier before state machine transition commit ef4ca3d4bf09716cff9ba00eb0351deadc8417ab upstream. Move the barrier raise operation before calling llbitmap_state_machine() in both llbitmap_start_write() and llbitmap_start_discard(). This ensures the barrier is in place before any state transitions occur, preventing potential race conditions where the state machine could complete before the barrier is properly raised. Cc: stable@vger.kernel.org Fixes: 5ab829f1971d ("md/md-llbitmap: introduce new lockless bitmap") Link: https://lore.kernel.org/linux-raid/20260223024038.3084853-3-yukuai@fnnas.com Signed-off-by: Yu Kuai Signed-off-by: Greg Kroah-Hartman --- drivers/md/md-llbitmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c index a8fd81c274027..0526e742062ac 100644 --- a/drivers/md/md-llbitmap.c +++ b/drivers/md/md-llbitmap.c @@ -1070,12 +1070,12 @@ static void llbitmap_start_write(struct mddev *mddev, sector_t offset, int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; - llbitmap_state_machine(llbitmap, start, end, BitmapActionStartwrite); - while (page_start <= page_end) { llbitmap_raise_barrier(llbitmap, page_start); page_start++; } + + llbitmap_state_machine(llbitmap, start, end, BitmapActionStartwrite); } static void llbitmap_end_write(struct mddev *mddev, sector_t offset, @@ -1102,12 +1102,12 @@ static void llbitmap_start_discard(struct mddev *mddev, sector_t offset, int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT; - llbitmap_state_machine(llbitmap, start, end, BitmapActionDiscard); - while (page_start <= page_end) { llbitmap_raise_barrier(llbitmap, page_start); page_start++; } + + llbitmap_state_machine(llbitmap, start, end, BitmapActionDiscard); } static void llbitmap_end_discard(struct mddev *mddev, sector_t offset, From 1985cb3247e87ff6b8ca4bc5f9626f4f51024507 Mon Sep 17 00:00:00 2001 From: Chia-Ming Chang Date: Thu, 2 Apr 2026 14:14:06 +0800 Subject: [PATCH 207/554] md/raid5: fix soft lockup in retry_aligned_read() commit 7f9f7c697474268d9ef9479df3ddfe7cdcfbbffc upstream. When retry_aligned_read() encounters an overlapped stripe, it releases the stripe via raid5_release_stripe() which puts it on the lockless released_stripes llist. In the next raid5d loop iteration, release_stripe_list() drains the stripe onto handle_list (since STRIPE_HANDLE is set by the original IO), but retry_aligned_read() runs before handle_active_stripes() and removes the stripe from handle_list via find_get_stripe() -> list_del_init(). This prevents handle_stripe() from ever processing the stripe to resolve the overlap, causing an infinite loop and soft lockup. Fix this by using __release_stripe() with temp_inactive_list instead of raid5_release_stripe() in the failure path, so the stripe does not go through the released_stripes llist. This allows raid5d to break out of its loop, and the overlap will be resolved when the stripe is eventually processed by handle_stripe(). Fixes: 773ca82fa1ee ("raid5: make release_stripe lockless") Cc: stable@vger.kernel.org Signed-off-by: FengWei Shih Signed-off-by: Chia-Ming Chang Link: https://lore.kernel.org/linux-raid/20260402061406.455755-1-chiamingc@synology.com/ Signed-off-by: Yu Kuai Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3b711a1198adb..b9f0d01ce01cb 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6635,7 +6635,13 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio, } if (!add_stripe_bio(sh, raid_bio, dd_idx, 0, 0)) { - raid5_release_stripe(sh); + int hash; + + spin_lock_irq(&conf->device_lock); + hash = sh->hash_lock_index; + __release_stripe(conf, sh, + &conf->temp_inactive_list[hash]); + spin_unlock_irq(&conf->device_lock); conf->retry_read_aligned = raid_bio; conf->retry_read_offset = scnt; return handled; From 73ce72edd113374801045924d4417199963f73a3 Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Sat, 4 Apr 2026 15:44:35 +0800 Subject: [PATCH 208/554] md/raid5: validate payload size before accessing journal metadata commit b0cc3ae97e893bf54bbce447f4e9fd2e0b88bff9 upstream. r5c_recovery_analyze_meta_block() and r5l_recovery_verify_data_checksum_for_mb() iterate over payloads in a journal metadata block using on-disk payload size fields without validating them against the remaining space in the metadata block. A corrupted journal contains payload sizes extending beyond the PAGE_SIZE boundary can cause out-of-bounds reads when accessing payload fields or computing offsets. Add bounds validation for each payload type to ensure the full payload fits within meta_size before processing. Fixes: b4c625c67362 ("md/r5cache: r5cache recovery: part 1") Cc: stable@vger.kernel.org Signed-off-by: Junrui Luo Link: https://lore.kernel.org/linux-raid/SYBPR01MB78815E78D829BB86CD7C8015AF5FA@SYBPR01MB7881.ausprd01.prod.outlook.com/ Signed-off-by: Yu Kuai Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5-cache.c | 48 +++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index ba768ca7f4229..2518781cbdf6d 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -2002,15 +2002,27 @@ r5l_recovery_verify_data_checksum_for_mb(struct r5l_log *log, return -ENOMEM; while (mb_offset < le32_to_cpu(mb->meta_size)) { + sector_t payload_len; + payload = (void *)mb + mb_offset; payload_flush = (void *)mb + mb_offset; if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_DATA) { + payload_len = sizeof(struct r5l_payload_data_parity) + + (sector_t)sizeof(__le32) * + (le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9)); + if (mb_offset + payload_len > le32_to_cpu(mb->meta_size)) + goto mismatch; if (r5l_recovery_verify_data_checksum( log, ctx, page, log_offset, payload->checksum[0]) < 0) goto mismatch; } else if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_PARITY) { + payload_len = sizeof(struct r5l_payload_data_parity) + + (sector_t)sizeof(__le32) * + (le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9)); + if (mb_offset + payload_len > le32_to_cpu(mb->meta_size)) + goto mismatch; if (r5l_recovery_verify_data_checksum( log, ctx, page, log_offset, payload->checksum[0]) < 0) @@ -2023,22 +2035,18 @@ r5l_recovery_verify_data_checksum_for_mb(struct r5l_log *log, payload->checksum[1]) < 0) goto mismatch; } else if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_FLUSH) { - /* nothing to do for R5LOG_PAYLOAD_FLUSH here */ + payload_len = sizeof(struct r5l_payload_flush) + + (sector_t)le32_to_cpu(payload_flush->size); + if (mb_offset + payload_len > le32_to_cpu(mb->meta_size)) + goto mismatch; } else /* not R5LOG_PAYLOAD_DATA/PARITY/FLUSH */ goto mismatch; - if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_FLUSH) { - mb_offset += sizeof(struct r5l_payload_flush) + - le32_to_cpu(payload_flush->size); - } else { - /* DATA or PARITY payload */ + if (le16_to_cpu(payload->header.type) != R5LOG_PAYLOAD_FLUSH) { log_offset = r5l_ring_add(log, log_offset, le32_to_cpu(payload->size)); - mb_offset += sizeof(struct r5l_payload_data_parity) + - sizeof(__le32) * - (le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9)); } - + mb_offset += payload_len; } put_page(page); @@ -2089,6 +2097,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, log_offset = r5l_ring_add(log, ctx->pos, BLOCK_SECTORS); while (mb_offset < le32_to_cpu(mb->meta_size)) { + sector_t payload_len; int dd; payload = (void *)mb + mb_offset; @@ -2097,6 +2106,12 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_FLUSH) { int i, count; + payload_len = sizeof(struct r5l_payload_flush) + + (sector_t)le32_to_cpu(payload_flush->size); + if (mb_offset + payload_len > + le32_to_cpu(mb->meta_size)) + return -EINVAL; + count = le32_to_cpu(payload_flush->size) / sizeof(__le64); for (i = 0; i < count; ++i) { stripe_sect = le64_to_cpu(payload_flush->flush_stripes[i]); @@ -2110,12 +2125,17 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, } } - mb_offset += sizeof(struct r5l_payload_flush) + - le32_to_cpu(payload_flush->size); + mb_offset += payload_len; continue; } /* DATA or PARITY payload */ + payload_len = sizeof(struct r5l_payload_data_parity) + + (sector_t)sizeof(__le32) * + (le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9)); + if (mb_offset + payload_len > le32_to_cpu(mb->meta_size)) + return -EINVAL; + stripe_sect = (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_DATA) ? raid5_compute_sector( conf, le64_to_cpu(payload->location), 0, &dd, @@ -2180,9 +2200,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, log_offset = r5l_ring_add(log, log_offset, le32_to_cpu(payload->size)); - mb_offset += sizeof(struct r5l_payload_data_parity) + - sizeof(__le32) * - (le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9)); + mb_offset += payload_len; } return 0; From ea43a34452a6a58507e42d7edfdfc4ae95744f9f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 6 Mar 2026 17:33:07 +0100 Subject: [PATCH 209/554] check-uapi: link into shared objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a261f6dff3c1653c19c065c3b3650c625447b8a7 upstream. While testing ABI changes across all architectures, I found that abidiff sometimes produces nonsensical output. Further debugging identified missing or broken libelf support for architecture specific relocations in ET_REL binaries as the source of the problem[1]. Change the script to no longer produce a relocatable object file but instead create a shared library for each header. This makes abidiff work for all of the architectures in upstream linux kernels. Link: https://sourceware.org/bugzilla/show_bug.cgi?id=33869 Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Thomas Weißschuh Acked-by: Nathan Chancellor Link: https://patch.msgid.link/20260306163309.2015837-2-arnd@kernel.org Signed-off-by: Nicolas Schier Signed-off-by: Greg Kroah-Hartman --- scripts/check-uapi.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/check-uapi.sh b/scripts/check-uapi.sh index 955581735cb3c..9fa45cbdecc25 100755 --- a/scripts/check-uapi.sh +++ b/scripts/check-uapi.sh @@ -178,8 +178,11 @@ do_compile() { local -r inc_dir="$1" local -r header="$2" local -r out="$3" - printf "int main(void) { return 0; }\n" | \ - "$CC" -c \ + printf "int f(void) { return 0; }\n" | \ + "$CC" \ + -shared \ + -nostdlib \ + -fPIC \ -o "$out" \ -x c \ -O0 \ From 371f4dc806017e68a616d236b0b9d7287e640b99 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Sat, 4 Apr 2026 15:14:34 +0530 Subject: [PATCH 210/554] HID: apple: ensure the keyboard backlight is off if suspending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1f95a6cd5ad78ed27a31a20cbd1facff6f10b33d upstream. Some users reported that upon suspending their keyboard backlight remained on. Fix this by adding the missing LED_CORE_SUSPENDRESUME flag. Cc: stable@vger.kernel.org Fixes: 394ba612f941 ("HID: apple: Add support for magic keyboard backlight on T2 Macs") Fixes: 9018eacbe623 ("HID: apple: Add support for keyboard backlight on certain T2 Macs.") Reported-by: André Eikmeyer Tested-by: André Eikmeyer Signed-off-by: Aditya Garg Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-apple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 9dcb252c5d6c7..cb1bd6dca8356 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -858,6 +858,7 @@ static int apple_backlight_init(struct hid_device *hdev) asc->backlight->cdev.name = "apple::kbd_backlight"; asc->backlight->cdev.max_brightness = rep->backlight_on_max; asc->backlight->cdev.brightness_set_blocking = apple_backlight_led_set; + asc->backlight->cdev.flags = LED_CORE_SUSPENDRESUME; ret = apple_backlight_set(hdev, 0, 0); if (ret < 0) { @@ -926,6 +927,7 @@ static int apple_magic_backlight_init(struct hid_device *hdev) backlight->cdev.name = ":white:" LED_FUNCTION_KBD_BACKLIGHT; backlight->cdev.max_brightness = backlight->brightness->field[0]->logical_maximum; backlight->cdev.brightness_set_blocking = apple_magic_backlight_led_set; + backlight->cdev.flags = LED_CORE_SUSPENDRESUME; apple_magic_backlight_set(backlight, 0, 0); From fdaa42ca370d056428e5e171247c8fdce8dff36a Mon Sep 17 00:00:00 2001 From: Chia-Ming Chang Date: Tue, 24 Feb 2026 17:34:42 +0800 Subject: [PATCH 211/554] inotify: fix watch count leak when fsnotify_add_inode_mark_locked() fails commit 6a320935fa4293e9e599ec9f85dc9eb3be7029f8 upstream. When fsnotify_add_inode_mark_locked() fails in inotify_new_watch(), the error path calls inotify_remove_from_idr() but does not call dec_inotify_watches() to undo the preceding inc_inotify_watches(). This leaks a watch count, and repeated failures can exhaust the max_user_watches limit with -ENOSPC even when no watches are active. Prior to commit 1cce1eea0aff ("inotify: Convert to using per-namespace limits"), the watch count was incremented after fsnotify_add_mark_locked() succeeded, so this path was not affected. The conversion moved inc_inotify_watches() before the mark insertion without adding the corresponding rollback. Add the missing dec_inotify_watches() call in the error path. Fixes: 1cce1eea0aff ("inotify: Convert to using per-namespace limits") Cc: stable@vger.kernel.org Signed-off-by: Chia-Ming Chang Signed-off-by: robbieko Reviewed-by: Nikolay Borisov Link: https://patch.msgid.link/20260224093442.3076294-1-chiamingc@synology.com Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/notify/inotify/inotify_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index b372fb2c56bd1..0d813c52ff9c3 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -621,6 +621,7 @@ static int inotify_new_watch(struct fsnotify_group *group, if (ret) { /* we failed to get on the inode, get off the idr */ inotify_remove_from_idr(group, tmp_i_mark); + dec_inotify_watches(group->inotify_data.ucounts); goto out_err; } From ef7ce8f4a341c57b0b760bbb52c89a480278c28c Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 21 Apr 2026 09:31:36 -0700 Subject: [PATCH 212/554] x86/cpu: Disable FRED when PTI is forced on commit 932d922285ef4d0d655a6f5def2779ae86ca0d73 upstream. FRED and PTI were never intended to work together. No FRED hardware is vulnerable to Meltdown and all of it should have LASS anyway. Nevertheless, if you boot a system with pti=on and fred=on, the kernel tries to do what is asked of it and dies a horrible death on the first attempt to run userspace (since it never switches to the user page tables). Disable FRED when PTI is forced on, and print a warning about it. A quick brain dump about what a FRED+PTI implementation would look like is below. I'm not sure it would make any sense to do it, but never say never. All I know is that it's way too complicated to be worth it today. The SWITCH_TO_USER/KERNEL_CR3 bits are simple to fix (or at least we have the assembly tools to do it already), as is sticking the FRED entry text in .entry.text (it's not in there today). The nasty part is the stacks. Today, the CPU pops into the kernel on MSR_IA32_FRED_RSP0 which is normal old kernel memory and not mapped to userspace. The hardware pushes gunk on to MSR_IA32_FRED_RSP0, which is currently the task stacks. MSR_IA32_FRED_RSP0 would need to point elsewhere, probably cpu_entry_stack(). Then, start playing games with stacks on entry/exit, including copying gunk to and from the task stack. While I'd *like* to have PTI everywhere, I'm not sure it's worth mucking up the FRED code with PTI kludges. If a user wants fast entry/exit, they use FRED. If you want PTI (and sekuritay), you certainly don't care about fast entry and FRED isn't going to help you *all* that much, so you can just stay with the IDT. Plus, FRED hardware should have LASS which gives you a similar security profile to PTI without the CR3 munging. Reported-by: Gayatri Kammela Signed-off-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) Tested-by: Maciej Wieczor-Retman Cc:stable@vger.kernel.org Link: https://patch.msgid.link/20260421163136.E7C6788A@davehans-spike.ostc.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pti.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index b10d4d131dce3..af9176968fa2c 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -105,6 +105,11 @@ void __init pti_check_boottime_disable(void) pr_debug("PTI enabled, disabling INVLPGB\n"); setup_clear_cpu_cap(X86_FEATURE_INVLPGB); } + + if (cpu_feature_enabled(X86_FEATURE_FRED)) { + pr_debug("PTI enabled, disabling FRED\n"); + setup_clear_cpu_cap(X86_FEATURE_FRED); + } } static int __init pti_parse_cmdline(char *arg) From 4f3374c990fb2adec06d20fd6d780927811c9aa0 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Thu, 9 Apr 2026 11:43:30 -0700 Subject: [PATCH 213/554] x86/shstk: Prevent deadlock during shstk sigreturn commit 9874b2917b9fbc30956fee209d3c4aa47201c64e upstream. During sigreturn the shadow stack signal frame is popped. The kernel does this by reading the shadow stack using normal read accesses. When it can't assume the memory is shadow stack, it takes extra steps to makes sure it is reading actual shadow stack memory and not other normal readable memory. It does this by holding the mmap read lock while doing the access and checking the flags of the VMA. Unfortunately that is not safe. If the read of the shadow stack sigframe hits a page fault, the fault handler will try to recursively grab another mmap read lock. This normally works ok, but if a writer on another CPU is also waiting, the second read lock could fail and cause a deadlock. Fix this by not holding mmap lock during the read access to userspace. Instead use mmap_lock_speculate_...() to watch for changes between dropping mmap lock and the userspace access. Retry if anything grabbed an mmap write lock in between and could have changed the VMA. These mmap_lock_speculate_...() helpers use mm::mm_lock_seq, which is only available when PER_VMA_LOCK is configured. So make X86_USER_SHADOW_STACK depend on it. On x86, PER_VMA_LOCK is a default configuration for SMP kernels. So drop support for the other configs under the assumption that the !SMP shadow stack user base does not exist. Currently there is a check that skips the lookup work when the SSP can be assumed to be on a shadow stack. While reorganizing the function, remove the optimization to make the tricky code flows more common, such that issues like this cannot escape detection for so long. Fixes: 7fad2a432cd3 ("x86/shstk: Check that signal frame is shadow stack mem") Suggested-by: Linus Torvalds Signed-off-by: Rick Edgecombe Signed-off-by: Thomas Gleixner Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 1 + arch/x86/kernel/shstk.c | 44 ++++++++++++++++++++++------------------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ee41af778a9dc..8623c4a5ce7b5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1877,6 +1877,7 @@ config X86_USER_SHADOW_STACK bool "X86 userspace shadow stack" depends on AS_WRUSS depends on X86_64 + depends on PER_VMA_LOCK select ARCH_USES_HIGH_VMA_FLAGS select ARCH_HAS_USER_SHADOW_STACK select X86_CET diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c index ff8edea8511b4..63775693a3bb4 100644 --- a/arch/x86/kernel/shstk.c +++ b/arch/x86/kernel/shstk.c @@ -334,10 +334,8 @@ static int shstk_push_sigframe(unsigned long *ssp) static int shstk_pop_sigframe(unsigned long *ssp) { - struct vm_area_struct *vma; unsigned long token_addr; - bool need_to_check_vma; - int err = 1; + unsigned int seq; /* * It is possible for the SSP to be off the end of a shadow stack by 4 @@ -348,25 +346,35 @@ static int shstk_pop_sigframe(unsigned long *ssp) if (!IS_ALIGNED(*ssp, 8)) return -EINVAL; - need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp; + do { + struct vm_area_struct *vma; + bool valid_vma; + int err; - if (need_to_check_vma) if (mmap_read_lock_killable(current->mm)) return -EINTR; - err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp); - if (unlikely(err)) - goto out_err; - - if (need_to_check_vma) { vma = find_vma(current->mm, *ssp); - if (!vma || !(vma->vm_flags & VM_SHADOW_STACK)) { - err = -EFAULT; - goto out_err; - } - + valid_vma = vma && (vma->vm_flags & VM_SHADOW_STACK); + + /* + * VMAs can change between get_shstk_data() and find_vma(). + * Watch for changes and ensure that 'token_addr' comes from + * 'vma' by recording a seqcount. + * + * Ignore the return value of mmap_lock_speculate_try_begin() + * because the mmap lock excludes the possibility of writers. + */ + mmap_lock_speculate_try_begin(current->mm, &seq); mmap_read_unlock(current->mm); - } + + if (!valid_vma) + return -EINVAL; + + err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp); + if (err) + return err; + } while (mmap_lock_speculate_retry(current->mm, seq)); /* Restore SSP aligned? */ if (unlikely(!IS_ALIGNED(token_addr, 8))) @@ -379,10 +387,6 @@ static int shstk_pop_sigframe(unsigned long *ssp) *ssp = token_addr; return 0; -out_err: - if (need_to_check_vma) - mmap_read_unlock(current->mm); - return err; } int setup_signal_shadow_stack(struct ksignal *ksig) From 69c4d137b22d14a6fd7f8bd2f7689ff027eef41a Mon Sep 17 00:00:00 2001 From: Yi Cong Date: Fri, 6 Mar 2026 15:16:27 +0800 Subject: [PATCH 214/554] wifi: rtl8xxxu: fix potential use of uninitialized value commit f8a2fc809bfeb49130709b31a4d357a049f28547 upstream. The local variables 'mcs' and 'nss' in rtl8xxxu_update_ra_report() are passed to rtl8xxxu_desc_to_mcsrate() as output parameters. If the helper function encounters an unhandled rate index, it may return without setting these values, leading to the use of uninitialized stack data. Remove the helper rtl8xxxu_desc_to_mcsrate() and inline the logic into rtl8xxxu_update_ra_report(). This fixes the use of uninitialized 'mcs' and 'nss' variables for legacy rates. The new implementation explicitly handles: - Legacy rates: Set bitrate only. - HT rates (MCS0-15): Set MCS flags, index, and NSS (1 or 2) directly. - Invalid rates: Return early. Fixes: 7de16123d9e2 ("wifi: rtl8xxxu: Introduce rtl8xxxu_update_ra_report") Cc: stable@vger.kernel.org Suggested-by: Ping-Ke Shih Signed-off-by: Yi Cong Link: https://lore.kernel.org/all/96e31963da0c42dcb52ce44f818963d7@realtek.com/ Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260306071627.56501-1-cong.yi@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtl8xxxu/core.c | 28 ++++++-------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/core.c b/drivers/net/wireless/realtek/rtl8xxxu/core.c index a17c1084931b0..25e5fd48b8e28 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/core.c @@ -4821,20 +4821,6 @@ static const struct ieee80211_rate rtl8xxxu_legacy_ratetable[] = { {.bitrate = 540, .hw_value = 0x0b,}, }; -static void rtl8xxxu_desc_to_mcsrate(u16 rate, u8 *mcs, u8 *nss) -{ - if (rate <= DESC_RATE_54M) - return; - - if (rate >= DESC_RATE_MCS0 && rate <= DESC_RATE_MCS15) { - if (rate < DESC_RATE_MCS8) - *nss = 1; - else - *nss = 2; - *mcs = rate - DESC_RATE_MCS0; - } -} - static void rtl8xxxu_set_basic_rates(struct rtl8xxxu_priv *priv, u32 rate_cfg) { struct ieee80211_hw *hw = priv->hw; @@ -4944,23 +4930,25 @@ static void rtl8xxxu_set_aifs(struct rtl8xxxu_priv *priv, u8 slot_time) void rtl8xxxu_update_ra_report(struct rtl8xxxu_ra_report *rarpt, u8 rate, u8 sgi, u8 bw) { - u8 mcs, nss; - rarpt->txrate.flags = 0; if (rate <= DESC_RATE_54M) { rarpt->txrate.legacy = rtl8xxxu_legacy_ratetable[rate].bitrate; - } else { - rtl8xxxu_desc_to_mcsrate(rate, &mcs, &nss); + } else if (rate >= DESC_RATE_MCS0 && rate <= DESC_RATE_MCS15) { rarpt->txrate.flags |= RATE_INFO_FLAGS_MCS; + if (rate < DESC_RATE_MCS8) + rarpt->txrate.nss = 1; + else + rarpt->txrate.nss = 2; - rarpt->txrate.mcs = mcs; - rarpt->txrate.nss = nss; + rarpt->txrate.mcs = rate - DESC_RATE_MCS0; if (sgi) rarpt->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI; rarpt->txrate.bw = bw; + } else { + return; } rarpt->bit_rate = cfg80211_calculate_bitrate(&rarpt->txrate); From 83bb57635d7cbafde32f865b577ecfd969f02337 Mon Sep 17 00:00:00 2001 From: Zhenzhong Wu Date: Wed, 22 Apr 2026 10:45:53 +0800 Subject: [PATCH 215/554] tcp: call sk_data_ready() after listener migration commit 3864c6ba1e041bc75342353a70fa2a2c6f909923 upstream. When inet_csk_listen_stop() migrates an established child socket from a closing listener to another socket in the same SO_REUSEPORT group, the target listener gets a new accept-queue entry via inet_csk_reqsk_queue_add(), but that path never notifies the target listener's waiters. A nonblocking accept() still works because it checks the queue directly, but poll()/epoll_wait() waiters and blocking accept() callers can also remain asleep indefinitely. Call READ_ONCE(nsk->sk_data_ready)(nsk) after a successful migration in inet_csk_listen_stop(). However, after inet_csk_reqsk_queue_add() succeeds, the ref acquired in reuseport_migrate_sock() is effectively transferred to nreq->rsk_listener. Another CPU can then dequeue nreq via accept() or listener shutdown, hit reqsk_put(), and drop that listener ref. Since listeners are SOCK_RCU_FREE, wrap the post-queue_add() dereferences of nsk in rcu_read_lock()/rcu_read_unlock(), which also covers the existing sock_net(nsk) access in that path. The reqsk_timer_handler() path does not need the same changes for two reasons: half-open requests become readable only after the final ACK, where tcp_child_process() already wakes the listener; and once nreq is visible via inet_ehash_insert(), the success path no longer touches nsk directly. Fixes: 54b92e841937 ("tcp: Migrate TCP_ESTABLISHED/TCP_SYN_RECV sockets in accept queues.") Cc: stable@vger.kernel.org Suggested-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: Zhenzhong Wu Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260422024554.130346-2-jt26wzz@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_connection_sock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7a2f116106e96..ed773cd488769 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1486,16 +1486,19 @@ void inet_csk_listen_stop(struct sock *sk) if (nreq) { refcount_set(&nreq->rsk_refcnt, 1); + rcu_read_lock(); if (inet_csk_reqsk_queue_add(nsk, nreq, child)) { __NET_INC_STATS(sock_net(nsk), LINUX_MIB_TCPMIGRATEREQSUCCESS); reqsk_migrate_reset(req); + READ_ONCE(nsk->sk_data_ready)(nsk); } else { __NET_INC_STATS(sock_net(nsk), LINUX_MIB_TCPMIGRATEREQFAILURE); reqsk_migrate_reset(nreq); __reqsk_free(nreq); } + rcu_read_unlock(); /* inet_csk_reqsk_queue_add() has already * called inet_child_forget() on failure case. From e88827bb039904d323aa619d771ae4e54c5b1e2a Mon Sep 17 00:00:00 2001 From: Yiyang Chen Date: Mon, 30 Mar 2026 03:00:40 +0800 Subject: [PATCH 216/554] taskstats: set version in TGID exit notifications commit 16c4f0211aaa1ec1422b11b59f64f1abe9009fc0 upstream. delay accounting started populating taskstats records with a valid version field via fill_pid() and fill_tgid(). Later, commit ad4ecbcba728 ("[PATCH] delay accounting taskstats interface send tgid once") changed the TGID exit path to send the cached signal->stats aggregate directly instead of building the outgoing record through fill_tgid(). Unlike fill_tgid(), fill_tgid_exit() only accumulates accounting data and never initializes stats->version. As a result, TGID exit notifications can reach userspace with version == 0 even though PID exit notifications and TASKSTATS_CMD_GET replies carry a valid taskstats version. This is easy to reproduce with `tools/accounting/getdelays.c`. I have a small follow-up patch for that tool which: 1. increases the receive buffer/message size so the pid+tgid combined exit notification is not dropped/truncated 2. prints `stats->version`. With that patch, the reproducer is: Terminal 1: ./getdelays -d -v -l -m 0 Terminal 2: taskset -c 0 python3 -c 'import threading,time; t=threading.Thread(target=time.sleep,args=(0.1,)); t.start(); t.join()' That produces both PID and TGID exit notifications for the same process. The PID exit record reports a valid taskstats version, while the TGID exit record reports `version 0`. This patch (of 2): Set stats->version = TASKSTATS_VERSION after copying the cached TGID aggregate into the outgoing netlink payload so all taskstats records are self-describing again. Link: https://lkml.kernel.org/r/ba83d934e59edd431b693607de573eb9ca059309.1774810498.git.cyyzero16@gmail.com Fixes: ad4ecbcba728 ("[PATCH] delay accounting taskstats interface send tgid once") Signed-off-by: Yiyang Chen Cc: Balbir Singh Cc: Dr. Thomas Orgis Cc: Fan Yu Cc: Wang Yaxin Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- kernel/taskstats.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 0cd680ccc7e53..73bd6a6a78935 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -649,6 +649,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) goto err; memcpy(stats, tsk->signal->stats, sizeof(*stats)); + stats->version = TASKSTATS_VERSION; send: send_cpu_listeners(rep_skb, listeners); From b7a953374db91a911959e17e5020edbeab9ba8a1 Mon Sep 17 00:00:00 2001 From: Brian Mak Date: Wed, 25 Mar 2026 15:30:24 -0700 Subject: [PATCH 217/554] mfd: core: Preserve OF node when ACPI handle is present commit caa5a5d44d8ae4fd13b744857d66c9313b712d1f upstream. Switch device_set_node to set_primary_fwnode, so that the ACPI fwnode does not overwrite the of_node with NULL. This allows MFD children with both OF nodes and ACPI handles to have OF nodes again. Cc: stable@vger.kernel.org Fixes: 51e3b257099d ("mfd: core: Make use of device_set_node()") Signed-off-by: Brian Mak Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20260325223024.35992-1-makb@juniper.net Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/mfd-core.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index c55223ce4327a..256058a6e247e 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -88,7 +88,17 @@ static void mfd_acpi_add_device(const struct mfd_cell *cell, } } - device_set_node(&pdev->dev, acpi_fwnode_handle(adev ?: parent)); + /* + * NOTE: The fwnode design doesn't allow proper stacking/sharing. This + * should eventually turn into a device fwnode API call that will allow + * prepending to a list of fwnodes (with ACPI taking precedence). + * + * set_primary_fwnode() is used here, instead of device_set_node(), as + * device_set_node() will overwrite the existing fwnode, which may be an + * OF node that was populated earlier. To support a use case where ACPI + * and OF is used in conjunction, we call set_primary_fwnode() instead. + */ + set_primary_fwnode(&pdev->dev, acpi_fwnode_handle(adev ?: parent)); } #else static inline void mfd_acpi_add_device(const struct mfd_cell *cell, From b4339521bb4309dcf0468ce2aaf565e8cf43d295 Mon Sep 17 00:00:00 2001 From: Cengiz Can Date: Tue, 10 Feb 2026 11:17:14 +0300 Subject: [PATCH 218/554] apparmor: use target task's context in apparmor_getprocattr() commit 4afc61702bdcc3b9b519749ef966cf762a6e7051 upstream. apparmor_getprocattr() incorrectly calls task_ctx(current) instead of task_ctx(task) when retrieving prev and exec attributes, returning the caller's labels rather than the target's. Fix by passing task to task_ctx(). The issue can be reproduced when a process with an onexec transition (e.g., configured by a container runtime) is inspected via /proc//attr/apparmor/exec. The reader's own value is returned instead of the target's. Reported-by: Qualys Security Advisory Fixes: 3b529a7600d8 ("apparmor: move task domain change info to task security") Cc: stable@vger.kernel.org Co-developed-by: Cengiz Can Signed-off-by: Cengiz Can Co-developed-by: John Johansen Signed-off-by: John Johansen Signed-off-by: Greg Kroah-Hartman --- security/apparmor/lsm.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 13c9bfdf65ff5..3a4ef7bd3b5d0 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -821,25 +821,23 @@ static int apparmor_getprocattr(struct task_struct *task, const char *name, char **value) { int error = -ENOENT; - /* released below */ - const struct cred *cred = get_task_cred(task); - struct aa_task_ctx *ctx = task_ctx(current); struct aa_label *label = NULL; + rcu_read_lock(); if (strcmp(name, "current") == 0) - label = aa_get_newest_label(cred_label(cred)); - else if (strcmp(name, "prev") == 0 && ctx->previous) - label = aa_get_newest_label(ctx->previous); - else if (strcmp(name, "exec") == 0 && ctx->onexec) - label = aa_get_newest_label(ctx->onexec); + label = aa_get_newest_cred_label(__task_cred(task)); + else if (strcmp(name, "prev") == 0 && task_ctx(task)->previous) + label = aa_get_newest_label(task_ctx(task)->previous); + else if (strcmp(name, "exec") == 0 && task_ctx(task)->onexec) + label = aa_get_newest_label(task_ctx(task)->onexec); else error = -EINVAL; + rcu_read_unlock(); if (label) error = aa_getprocattr(label, value, true); aa_put_label(label); - put_cred(cred); return error; } From e08d75753db17aa943d7622f09d9c217b5bfd3b8 Mon Sep 17 00:00:00 2001 From: Shuvam Pandey Date: Thu, 9 Apr 2026 00:32:30 +0545 Subject: [PATCH 219/554] Bluetooth: hci_event: fix potential UAF in SSP passkey handlers commit 85fa3512048793076eef658f66489112dcc91993 upstream. hci_conn lookup and field access must be covered by hdev lock in hci_user_passkey_notify_evt() and hci_keypress_notify_evt(), otherwise the connection can be freed concurrently. Extend the hci_dev_lock critical section to cover all conn usage in both handlers. Keep the existing keypress notification behavior unchanged by routing the early exits through a common unlock path. Fixes: 92a25256f142 ("Bluetooth: mgmt: Implement support for passkey notification") Cc: stable@vger.kernel.org Signed-off-by: Shuvam Pandey Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_event.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7794f4f981596..c66443f4cf5a7 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5422,9 +5422,11 @@ static void hci_user_passkey_notify_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, ""); + hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); if (!conn) - return; + goto unlock; conn->passkey_notify = __le32_to_cpu(ev->passkey); conn->passkey_entered = 0; @@ -5433,6 +5435,9 @@ static void hci_user_passkey_notify_evt(struct hci_dev *hdev, void *data, mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, conn->dst_type, conn->passkey_notify, conn->passkey_entered); + +unlock: + hci_dev_unlock(hdev); } static void hci_keypress_notify_evt(struct hci_dev *hdev, void *data, @@ -5443,14 +5448,16 @@ static void hci_keypress_notify_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, ""); + hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); if (!conn) - return; + goto unlock; switch (ev->type) { case HCI_KEYPRESS_STARTED: conn->passkey_entered = 0; - return; + goto unlock; case HCI_KEYPRESS_ENTERED: conn->passkey_entered++; @@ -5465,13 +5472,16 @@ static void hci_keypress_notify_evt(struct hci_dev *hdev, void *data, break; case HCI_KEYPRESS_COMPLETED: - return; + goto unlock; } if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, conn->dst_type, conn->passkey_notify, conn->passkey_entered); + +unlock: + hci_dev_unlock(hdev); } static void hci_simple_pair_complete_evt(struct hci_dev *hdev, void *data, From 1656698656f61957355998d7eb5e60f00769d33a Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Tue, 3 Mar 2026 01:02:13 -0800 Subject: [PATCH 220/554] bus: mhi: host: pci_generic: Switch to async power up to avoid boot delays commit cfdb41adf1c2822ad1b1791d4d11093edb5582b6 upstream. Some modem devices can take significant time (up to 20 secs for sdx75) to enter mission mode during initialization. Currently, mhi_sync_power_up() waits for this entire process to complete, blocking other driver probes and delaying system boot. Switch to mhi_async_power_up() so probe can return immediately while MHI initialization continues in the background. This eliminates lengthy boot delays and allows other drivers to probe in parallel, improving overall system boot performance. Fixes: 5571519009d0 ("bus: mhi: host: pci_generic: Add SDX75 based modem support") Signed-off-by: Qiang Yu Signed-off-by: Manivannan Sadhasivam Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260303-b4-async_power_on-v2-1-d3db81eb457d@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/host/pci_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index 21f0522d9ff29..b929c7d6c49aa 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -1380,7 +1380,7 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_unregister; } - err = mhi_sync_power_up(mhi_cntrl); + err = mhi_async_power_up(mhi_cntrl); if (err) { dev_err(&pdev->dev, "failed to power up MHI controller\n"); goto err_unprepare; From c524c124e3094d2de12235a513854c03d06a2b58 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 27 Mar 2026 11:45:20 +0100 Subject: [PATCH 221/554] can: ucan: fix devres lifetime commit fed4626501c871890da287bec62a96e52da1af89 upstream. USB drivers bind to USB interfaces and any device managed resources should have their lifetime tied to the interface rather than parent USB device. This avoids issues like memory leaks when drivers are unbound without their devices being physically disconnected (e.g. on probe deferral or configuration changes). Fix the control message buffer lifetime so that it is released on driver unbind. Fixes: 9f2d3eae88d2 ("can: ucan: add driver for Theobroma Systems UCAN devices") Cc: stable@vger.kernel.org # 4.19 Cc: Jakob Unterwurzacher Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260327104520.1310158-1-johan@kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/ucan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c index 6c90b4a7d955a..c3ebb648d8b0d 100644 --- a/drivers/net/can/usb/ucan.c +++ b/drivers/net/can/usb/ucan.c @@ -1399,7 +1399,7 @@ static int ucan_probe(struct usb_interface *intf, */ /* Prepare Memory for control transfers */ - ctl_msg_buffer = devm_kzalloc(&udev->dev, + ctl_msg_buffer = devm_kzalloc(&intf->dev, sizeof(union ucan_ctl_payload), GFP_KERNEL); if (!ctl_msg_buffer) { From 343a5bf68a8ff9affcf2b70677ea4cf40c195ee4 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 16 Apr 2026 18:07:00 +0100 Subject: [PATCH 222/554] crypto: acomp - fix wrong pointer stored by acomp_save_req() commit d7e20b9bd6c990773cf0c09e2642250b8a70263d upstream. acomp_save_req() stores &req->chain in req->base.data. When acomp_reqchain_done() is invoked on asynchronous completion, it receives &req->chain as the data argument but casts it directly to struct acomp_req. Since data points to the chain member, all subsequent field accesses are at a wrong offset, resulting in memory corruption. The issue occurs when an asynchronous hardware implementation, such as the QAT driver, completes a request that uses the DMA virtual address interface (e.g. acomp_request_set_src_dma()). This combination causes crypto_acomp_compress() to enter the acomp_do_req_chain() path, which sets acomp_reqchain_done() as the completion callback via acomp_save_req(). With KASAN enabled, this manifests as a general protection fault in acomp_reqchain_done(): general protection fault, probably for non-canonical address 0xe000040000000000 KASAN: probably user-memory-access in range [0x0000400000000000-0x0000400000000007] RIP: 0010:acomp_reqchain_done+0x15b/0x4e0 Call Trace: qat_comp_alg_callback+0x5d/0xa0 [intel_qat] adf_ring_response_handler+0x376/0x8b0 [intel_qat] adf_response_handler+0x60/0x170 [intel_qat] tasklet_action_common+0x223/0x820 handle_softirqs+0x1ab/0x640 Fix this by storing the request itself in req->base.data instead of &req->chain, so that acomp_reqchain_done() receives the correct pointer. Simplify acomp_restore_req() accordingly to access req->chain directly. Fixes: 64929fe8c0a4 ("crypto: acomp - Remove request chaining") Cc: stable@vger.kernel.org Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/acompress.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/crypto/acompress.c b/crypto/acompress.c index be28cbfd22e32..25af7697d6bb9 100644 --- a/crypto/acompress.c +++ b/crypto/acompress.c @@ -171,15 +171,13 @@ static void acomp_save_req(struct acomp_req *req, crypto_completion_t cplt) state->compl = req->base.complete; state->data = req->base.data; req->base.complete = cplt; - req->base.data = state; + req->base.data = req; } static void acomp_restore_req(struct acomp_req *req) { - struct acomp_req_chain *state = req->base.data; - - req->base.complete = state->compl; - req->base.data = state->data; + req->base.complete = req->chain.compl; + req->base.data = req->chain.data; } static void acomp_reqchain_virt(struct acomp_req *req) From 4d713333dd32110245f8c54e101b0b9589fd2d28 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 18 Feb 2026 13:34:49 -0800 Subject: [PATCH 223/554] crypto: arm64/aes - Fix 32-bit aes_mac_update() arg treated as 64-bit commit f8f08d7cc43237e91e3aedf7b67d015d24c38fcc upstream. Since the 'enc_after' argument to neon_aes_mac_update() and ce_aes_mac_update() has type 'int', it needs to be accessed using the corresponding 32-bit register, not the 64-bit register. The upper half of the corresponding 64-bit register may contain garbage. Fixes: 4860620da7e5 ("crypto: arm64/aes - add NEON/Crypto Extensions CBCMAC/CMAC/XCBC driver") Cc: stable@vger.kernel.org Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20260218213501.136844-4-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-modes.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 0e834a2c062cf..e793478f37c1e 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -838,7 +838,7 @@ AES_FUNC_START(aes_mac_update) encrypt_block v0, w2, x1, x7, w8 eor v0.16b, v0.16b, v4.16b cmp w3, wzr - csinv x5, x6, xzr, eq + csinv w5, w6, wzr, eq cbz w5, .Lmacout encrypt_block v0, w2, x1, x7, w8 st1 {v0.16b}, [x4] /* return dg */ @@ -852,7 +852,7 @@ AES_FUNC_START(aes_mac_update) eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ subs w3, w3, #1 - csinv x5, x6, xzr, eq + csinv w5, w6, wzr, eq cbz w5, .Lmacout .Lmacenc: From 61516b4a5b2647dc3f8f67b5dffaf038be997511 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 11 Mar 2026 03:07:35 +0100 Subject: [PATCH 224/554] crypto: atmel-aes - Fix 3-page memory leak in atmel_aes_buff_cleanup commit 3fcfff4ed35f963380a68741bcd52742baff7f76 upstream. atmel_aes_buff_init() allocates 4 pages using __get_free_pages() with ATMEL_AES_BUFFER_ORDER, but atmel_aes_buff_cleanup() frees only the first page using free_page(), leaking the remaining 3 pages. Use free_pages() with ATMEL_AES_BUFFER_ORDER to fix the memory leak. Fixes: bbe628ed897d ("crypto: atmel-aes - improve performances of data transfer") Cc: stable@vger.kernel.org Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/atmel-aes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 3a2684208dda9..5a6d64be81858 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -2131,7 +2131,7 @@ static int atmel_aes_buff_init(struct atmel_aes_dev *dd) static void atmel_aes_buff_cleanup(struct atmel_aes_dev *dd) { - free_page((unsigned long)dd->buf); + free_pages((unsigned long)dd->buf, ATMEL_AES_BUFFER_ORDER); } static int atmel_aes_dma_init(struct atmel_aes_dev *dd) From 9c032781c2b1fbe49ba941ddb208b030035f5902 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 20 Feb 2026 15:03:13 +0100 Subject: [PATCH 225/554] crypto: atmel-ecc - Release client on allocation failure commit 095d50008d55d13f8fcf1bbeb7c6eba51779bc85 upstream. Call atmel_ecc_i2c_client_free() to release the I2C client reserved by atmel_ecc_i2c_client_alloc() when crypto_alloc_kpp() fails. Otherwise ->tfm_count will be out of sync. Fixes: 11105693fa05 ("crypto: atmel-ecc - introduce Microchip / Atmel ECC driver") Cc: stable@vger.kernel.org Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/atmel-ecc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c index 0d48e64d28b11..9da5a03880807 100644 --- a/drivers/crypto/atmel-ecc.c +++ b/drivers/crypto/atmel-ecc.c @@ -261,6 +261,7 @@ static int atmel_ecdh_init_tfm(struct crypto_kpp *tfm) if (IS_ERR(fallback)) { dev_err(&ctx->client->dev, "Failed to allocate transformation for '%s': %ld\n", alg, PTR_ERR(fallback)); + atmel_ecc_i2c_client_free(ctx->client); return PTR_ERR(fallback); } From 808685bfd124d9666b49ac53e735744bc78123b6 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Mon, 30 Mar 2026 17:19:32 +0200 Subject: [PATCH 226/554] crypto: hisilicon - Fix dma_unmap_single() direction commit 1ee57ab93b75eb59f426aef37b5498a7ffc28278 upstream. The direction used to map the buffer skreq->iv is DMA_TO_DEVICE but it is unmapped with direction DMA_BIDIRECTIONAL in the error path. Change the unmap to match the mapping. Fixes: 915e4e8413da ("crypto: hisilicon - SEC security accelerator driver") Cc: Signed-off-by: Thomas Fourier Reviewed-by: Thorsten Blum Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/hisilicon/sec/sec_algs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sec/sec_algs.c b/drivers/crypto/hisilicon/sec/sec_algs.c index 1189effcdad07..512190b31b999 100644 --- a/drivers/crypto/hisilicon/sec/sec_algs.c +++ b/drivers/crypto/hisilicon/sec/sec_algs.c @@ -844,7 +844,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, if (crypto_skcipher_ivsize(atfm)) dma_unmap_single(info->dev, sec_req->dma_iv, crypto_skcipher_ivsize(atfm), - DMA_BIDIRECTIONAL); + DMA_TO_DEVICE); err_unmap_out_sg: if (split) sec_unmap_sg_on_err(skreq->dst, steps, splits_out, From 910f335786a0a0f0b46c3c8c19a13d25cb4454b6 Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Mon, 30 Mar 2026 11:34:02 +0800 Subject: [PATCH 227/554] crypto: ccree - fix a memory leak in cc_mac_digest() commit 02c64052fad03699b9c6d1df2f9b444d17e4ac50 upstream. Add cc_unmap_result() if cc_map_hash_request_final() fails to prevent potential memory leak. Fixes: 63893811b0fc ("crypto: ccree - add ahash support") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccree/cc_hash.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index c6d085c8ff797..73179bf725a71 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -1448,6 +1448,7 @@ static int cc_mac_digest(struct ahash_request *req) if (cc_map_hash_request_final(ctx->drvdata, state, req->src, req->nbytes, 1, flags)) { dev_err(dev, "map_ahash_request_final() failed\n"); + cc_unmap_result(dev, state, digestsize, req->result); cc_unmap_req(dev, state, ctx); return -ENOMEM; } From 863d11b3927703ad95077c81a8a6489c5c7872f7 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 7 Mar 2026 16:31:10 +0100 Subject: [PATCH 228/554] crypto: atmel-tdes - fix DMA sync direction commit c8a9a647532f5c2a04180352693215e24e9dba03 upstream. Before DMA output is consumed by the CPU, ->dma_addr_out must be synced with dma_sync_single_for_cpu() instead of dma_sync_single_for_device(). Using the wrong direction can return stale cache data on non-coherent platforms. Fixes: 13802005d8f2 ("crypto: atmel - add Atmel DES/TDES driver") Fixes: 1f858040c2f7 ("crypto: atmel-tdes - add support for latest release of the IP (0x700)") Cc: stable@vger.kernel.org Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/atmel-tdes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index 3b2a92029b16f..d3bd8b72c2c95 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -294,8 +294,8 @@ static int atmel_tdes_crypt_pdc_stop(struct atmel_tdes_dev *dd) dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); } else { - dma_sync_single_for_device(dd->dev, dd->dma_addr_out, - dd->dma_size, DMA_FROM_DEVICE); + dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out, + dd->dma_size, DMA_FROM_DEVICE); /* copy data */ count = atmel_tdes_sg_copy(&dd->out_sg, &dd->out_offset, @@ -619,8 +619,8 @@ static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd) dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); } else { - dma_sync_single_for_device(dd->dev, dd->dma_addr_out, - dd->dma_size, DMA_FROM_DEVICE); + dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out, + dd->dma_size, DMA_FROM_DEVICE); /* copy data */ count = atmel_tdes_sg_copy(&dd->out_sg, &dd->out_offset, From 64ea9dbf4c779e2af2c47844d63280f8e1fe52e3 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 15 Feb 2026 21:51:53 +0100 Subject: [PATCH 229/554] crypto: atmel-sha204a - Fix error codes in OTP reads commit 094c276da6a0d4971c3faae09a36b51d096659b2 upstream. Return -EINVAL from atmel_i2c_init_read_otp_cmd() on invalid addresses instead of -1. Since the OTP zone is accessed in 4-byte blocks, valid addresses range from 0 to OTP_ZONE_SIZE / 4 - 1. Fix the bounds check accordingly. In atmel_sha204a_otp_read(), propagate the actual error code from atmel_i2c_init_read_otp_cmd() instead of -1. Also, return -EIO instead of -EINVAL when the device is not ready. Cc: stable@vger.kernel.org Fixes: e05ce444e9e5 ("crypto: atmel-sha204a - add reading from otp zone") Signed-off-by: Thorsten Blum Reviewed-by: Lothar Rubusch Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/atmel-i2c.c | 4 ++-- drivers/crypto/atmel-sha204a.c | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index a895e4289efa0..a85dfdc6b3606 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -72,8 +72,8 @@ EXPORT_SYMBOL(atmel_i2c_init_read_config_cmd); int atmel_i2c_init_read_otp_cmd(struct atmel_i2c_cmd *cmd, u16 addr) { - if (addr < 0 || addr > OTP_ZONE_SIZE) - return -1; + if (addr >= OTP_ZONE_SIZE / 4) + return -EINVAL; cmd->word_addr = COMMAND; cmd->opcode = OPCODE_READ; diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index 8bcb9f3371f01..8f728cd7d740e 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -96,9 +96,10 @@ static int atmel_sha204a_rng_read(struct hwrng *rng, void *data, size_t max, static int atmel_sha204a_otp_read(struct i2c_client *client, u16 addr, u8 *otp) { struct atmel_i2c_cmd cmd; - int ret = -1; + int ret; - if (atmel_i2c_init_read_otp_cmd(&cmd, addr) < 0) { + ret = atmel_i2c_init_read_otp_cmd(&cmd, addr); + if (ret < 0) { dev_err(&client->dev, "failed, invalid otp address %04X\n", addr); return ret; @@ -108,7 +109,7 @@ static int atmel_sha204a_otp_read(struct i2c_client *client, u16 addr, u8 *otp) if (cmd.data[0] == 0xff) { dev_err(&client->dev, "failed, device not ready\n"); - return -EINVAL; + return -EIO; } memcpy(otp, cmd.data+1, 4); From 1193c12126d39bf986a5a9214827b73707b193ab Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 14 Mar 2026 20:36:29 +0100 Subject: [PATCH 230/554] crypto: atmel-sha204a - Fix potential UAF and memory leak in remove path commit bab1adf3b87e4bfac92c4f5963c63db434d561c1 upstream. Unregister the hwrng to prevent new ->read() calls and flush the Atmel I2C workqueue before teardown to prevent a potential UAF if a queued callback runs while the device is being removed. Drop the early return to ensure sysfs entries are removed and ->hwrng.priv is freed, preventing a memory leak. Fixes: da001fb651b0 ("crypto: atmel-i2c - add support for SHA204A random number generator") Cc: stable@vger.kernel.org Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/atmel-sha204a.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index 8f728cd7d740e..3fece04852908 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -194,10 +194,8 @@ static void atmel_sha204a_remove(struct i2c_client *client) { struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client); - if (atomic_read(&i2c_priv->tfm_count)) { - dev_emerg(&client->dev, "Device is busy, will remove it anyhow\n"); - return; - } + devm_hwrng_unregister(&client->dev, &i2c_priv->hwrng); + atmel_i2c_flush_queue(); sysfs_remove_group(&client->dev.kobj, &atmel_sha204a_groups); From 2ae365e32998409acef0a02f2905b19184c9856d Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 20 Feb 2026 14:31:36 +0100 Subject: [PATCH 231/554] crypto: atmel-sha204a - Fix uninitialized data access on OTP read error commit de4e66b763d1e81188cb2803ec109466582fc9d1 upstream. Return early if atmel_i2c_send_receive() fails to avoid checking potentially uninitialized data in 'cmd.data'. Cc: stable@vger.kernel.org Fixes: e05ce444e9e5 ("crypto: atmel-sha204a - add reading from otp zone") Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/atmel-sha204a.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index 3fece04852908..ae04b27a9ad11 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -106,6 +106,10 @@ static int atmel_sha204a_otp_read(struct i2c_client *client, u16 addr, u8 *otp) } ret = atmel_i2c_send_receive(client, &cmd); + if (ret < 0) { + dev_err(&client->dev, "failed to read otp at %04X\n", addr); + return ret; + } if (cmd.data[0] == 0xff) { dev_err(&client->dev, "failed, device not ready\n"); From 5c07962fed66e1238fad7635fa150570bd38b4c5 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 11 Mar 2026 16:56:47 +0100 Subject: [PATCH 232/554] crypto: nx - fix bounce buffer leaks in nx842_crypto_{alloc,free}_ctx commit adb3faf2db1a66d0f015b44ac909a32dfc7f2f9c upstream. The bounce buffers are allocated with __get_free_pages() using BOUNCE_BUFFER_ORDER (order 2 = 4 pages), but both the allocation error path and nx842_crypto_free_ctx() release the buffers with free_page(). Use free_pages() with the matching order instead. Fixes: ed70b479c2c0 ("crypto: nx - add hardware 842 crypto comp alg") Cc: stable@vger.kernel.org Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/nx/nx-842.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c index b950fcce8a9be..1f5005fcd69a9 100644 --- a/drivers/crypto/nx/nx-842.c +++ b/drivers/crypto/nx/nx-842.c @@ -116,8 +116,8 @@ void *nx842_crypto_alloc_ctx(struct nx842_driver *driver) ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) { kfree(ctx->wmem); - free_page((unsigned long)ctx->sbounce); - free_page((unsigned long)ctx->dbounce); + free_pages((unsigned long)ctx->sbounce, BOUNCE_BUFFER_ORDER); + free_pages((unsigned long)ctx->dbounce, BOUNCE_BUFFER_ORDER); kfree(ctx); return ERR_PTR(-ENOMEM); } @@ -131,8 +131,8 @@ void nx842_crypto_free_ctx(void *p) struct nx842_crypto_ctx *ctx = p; kfree(ctx->wmem); - free_page((unsigned long)ctx->sbounce); - free_page((unsigned long)ctx->dbounce); + free_pages((unsigned long)ctx->sbounce, BOUNCE_BUFFER_ORDER); + free_pages((unsigned long)ctx->dbounce, BOUNCE_BUFFER_ORDER); } EXPORT_SYMBOL_GPL(nx842_crypto_free_ctx); From 354a27e28447d919058cac2ed38e0d2ce78ba6a1 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 11 Mar 2026 16:56:49 +0100 Subject: [PATCH 233/554] crypto: nx - fix context leak in nx842_crypto_free_ctx commit 344e6a4f7ff4756b9b3f75e0eb7eaec297e35540 upstream. Since the scomp conversion, nx842_crypto_alloc_ctx() allocates the context separately, but nx842_crypto_free_ctx() never releases it. Add the missing kfree(ctx) to nx842_crypto_free_ctx(), and reuse nx842_crypto_free_ctx() in the allocation error path. Fixes: 980b5705f4e7 ("crypto: nx - Migrate to scomp API") Cc: stable@vger.kernel.org Signed-off-by: Thorsten Blum Reviewed-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/nx/nx-842.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c index 1f5005fcd69a9..8b1dc05df8c8d 100644 --- a/drivers/crypto/nx/nx-842.c +++ b/drivers/crypto/nx/nx-842.c @@ -115,10 +115,7 @@ void *nx842_crypto_alloc_ctx(struct nx842_driver *driver) ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) { - kfree(ctx->wmem); - free_pages((unsigned long)ctx->sbounce, BOUNCE_BUFFER_ORDER); - free_pages((unsigned long)ctx->dbounce, BOUNCE_BUFFER_ORDER); - kfree(ctx); + nx842_crypto_free_ctx(ctx); return ERR_PTR(-ENOMEM); } @@ -133,6 +130,7 @@ void nx842_crypto_free_ctx(void *p) kfree(ctx->wmem); free_pages((unsigned long)ctx->sbounce, BOUNCE_BUFFER_ORDER); free_pages((unsigned long)ctx->dbounce, BOUNCE_BUFFER_ORDER); + kfree(ctx); } EXPORT_SYMBOL_GPL(nx842_crypto_free_ctx); From a616126cb0806c1f4f65302dcc189e2d0fcace3c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 17 Mar 2026 17:40:02 -0600 Subject: [PATCH 234/554] crypto: nx - Fix packed layout in struct nx842_crypto_header commit b0bfa49c03e3c65737eafa73d8a698eaf55379a6 upstream. struct nx842_crypto_header is declared with the __packed attribute, however the fields grouped with struct_group_tagged() were not packed. This caused the grouped header portion of the structure to lose the packed layout guarantees of the containing structure. Fix this by replacing struct_group_tagged() with __struct_group(..., ..., __packed, ...) so the grouped fields are packed, and the original layout is preserved, restoring the intended packed layout of the structure. Before changes: struct nx842_crypto_header { union { struct { __be16 magic; /* 0 2 */ __be16 ignore; /* 2 2 */ u8 groups; /* 4 1 */ }; /* 0 6 */ struct nx842_crypto_header_hdr hdr; /* 0 6 */ }; /* 0 6 */ struct nx842_crypto_header_group group[]; /* 6 0 */ /* size: 6, cachelines: 1, members: 2 */ /* last cacheline: 6 bytes */ } __attribute__((__packed__)); After changes: struct nx842_crypto_header { union { struct { __be16 magic; /* 0 2 */ __be16 ignore; /* 2 2 */ u8 groups; /* 4 1 */ } __attribute__((__packed__)); /* 0 5 */ struct nx842_crypto_header_hdr hdr; /* 0 5 */ }; /* 0 5 */ struct nx842_crypto_header_group group[]; /* 5 0 */ /* size: 5, cachelines: 1, members: 2 */ /* last cacheline: 5 bytes */ } __attribute__((__packed__)); Fixes: 1e6b251ce175 ("crypto: nx - Avoid -Wflex-array-member-not-at-end warning") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Thorsten Blum Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/nx/nx-842.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h index f5e2c82ba8768..cd3c1a433e8cb 100644 --- a/drivers/crypto/nx/nx-842.h +++ b/drivers/crypto/nx/nx-842.h @@ -159,7 +159,7 @@ struct nx842_crypto_header_group { struct nx842_crypto_header { /* New members MUST be added within the struct_group() macro below. */ - struct_group_tagged(nx842_crypto_header_hdr, hdr, + __struct_group(nx842_crypto_header_hdr, hdr, __packed, __be16 magic; /* NX842_CRYPTO_MAGIC */ __be16 ignore; /* decompressed end bytes to ignore */ u8 groups; /* total groups in this header */ @@ -167,7 +167,7 @@ struct nx842_crypto_header { struct nx842_crypto_header_group group[]; } __packed; static_assert(offsetof(struct nx842_crypto_header, group) == sizeof(struct nx842_crypto_header_hdr), - "struct member likely outside of struct_group_tagged()"); + "struct member likely outside of __struct_group()"); #define NX842_CRYPTO_GROUP_MAX (0x20) From 87c99a50e0fdc68a5b9b52a94d49452cd3ff02ca Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Sun, 1 Mar 2026 21:10:58 +0800 Subject: [PATCH 235/554] dm mirror: fix integer overflow in create_dirty_log() commit 4c788c6f921b22f9b6c3f316c4a071c05683e7de upstream. The argument count calculation in create_dirty_log() performs `*args_used = 2 + param_count` before validating against argc. When a user provides a param_count close to UINT_MAX via the device mapper table string, this unsigned addition wraps around to a small value, causing the subsequent `argc < *args_used` check to be bypassed. The overflowed param_count is then passed as argc to dm_dirty_log_create(), where it can cause out-of-bounds reads on the argv array. Fix by comparing param_count against argc - 2 before performing the addition, following the same pattern used by parse_features() in the same file. Since argc >= 2 is already guaranteed, the subtraction is safe. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Reported-by: Yuhao Jiang Signed-off-by: Junrui Luo Reviewed-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-raid1.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 268f734ca9c38..bc8e04f6832a6 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -993,13 +993,13 @@ static struct dm_dirty_log *create_dirty_log(struct dm_target *ti, return NULL; } - *args_used = 2 + param_count; - - if (argc < *args_used) { + if (param_count > argc - 2) { ti->error = "Insufficient mirror log arguments"; return NULL; } + *args_used = 2 + param_count; + dl = dm_dirty_log_create(argv[0], ti, mirror_flush, param_count, argv + 2); if (!dl) { From b91e535f208c48a5e7464f1aa38338a30e7912df Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Fri, 27 Mar 2026 17:23:08 +0100 Subject: [PATCH 236/554] ceph: only d_add() negative dentries when they are unhashed commit 803447f93d75ab6e40c85e6d12b5630d281d70d6 upstream. Ceph can call d_add(dentry, NULL) on a negative dentry that is already present in the primary dcache hash. In the current VFS that is not safe. d_add() goes through __d_add() to __d_rehash(), which unconditionally reinserts dentry->d_hash into the hlist_bl bucket. If the dentry is already hashed, reinserting the same node can corrupt the bucket, including creating a self-loop. Once that happens, __d_lookup() can spin forever in the hlist_bl walk, typically looping only on the d_name.hash mismatch check and eventually triggering RCU stall reports like this one: rcu: INFO: rcu_sched self-detected stall on CPU rcu: 87-....: (2100 ticks this GP) idle=3a4c/1/0x4000000000000000 softirq=25003319/25003319 fqs=829 rcu: (t=2101 jiffies g=79058445 q=698988 ncpus=192) CPU: 87 UID: 2952868916 PID: 3933303 Comm: php-cgi8.3 Not tainted 6.18.17-i1-amd #950 NONE Hardware name: Dell Inc. PowerEdge R7615/0G9DHV, BIOS 1.6.6 09/22/2023 RIP: 0010:__d_lookup+0x46/0xb0 Code: c1 e8 07 48 8d 04 c2 48 8b 00 49 89 fc 49 89 f5 48 89 c3 48 83 e3 fe 48 83 f8 01 77 0f eb 2d 0f 1f 44 00 00 48 8b 1b 48 85 db <74> 20 39 6b 18 75 f3 48 8d 7b 78 e8 ba 85 d0 00 4c 39 63 10 74 1f RSP: 0018:ff745a70c8253898 EFLAGS: 00000282 RAX: ff26e470054cb208 RBX: ff26e470054cb208 RCX: 000000006e958966 RDX: ff26e48267340000 RSI: ff745a70c82539b0 RDI: ff26e458f74655c0 RBP: 000000006e958966 R08: 0000000000000180 R09: 9cd08d909b919a89 R10: ff26e458f74655c0 R11: 0000000000000000 R12: ff26e458f74655c0 R13: ff745a70c82539b0 R14: d0d0d0d0d0d0d0d0 R15: 2f2f2f2f2f2f2f2f FS: 00007f5770896980(0000) GS:ff26e482c5d88000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f5764de50c0 CR3: 000000a72abb5001 CR4: 0000000000771ef0 PKRU: 55555554 Call Trace: lookup_fast+0x9f/0x100 walk_component+0x1f/0x150 link_path_walk+0x20e/0x3d0 path_lookupat+0x68/0x180 filename_lookup+0xdc/0x1e0 vfs_statx+0x6c/0x140 vfs_fstatat+0x67/0xa0 __do_sys_newfstatat+0x24/0x60 do_syscall_64+0x6a/0x230 entry_SYSCALL_64_after_hwframe+0x76/0x7e This is reachable with reused cached negative dentries. A Ceph lookup or atomic_open can be handed a negative dentry that is already hashed, and fs/ceph/dir.c then hits one of two paths that incorrectly assume "negative" also means "unhashed": - ceph_finish_lookup(): MDS reply is -ENOENT with no trace -> d_add(dentry, NULL) - ceph_lookup(): local ENOENT fast path for a complete directory with shared caps -> d_add(dentry, NULL) Both paths can therefore re-add an already-hashed negative dentry. Ceph already uses the correct pattern elsewhere: ceph_fill_trace() only calls d_add(dn, NULL) for a negative null-dentry reply when d_unhashed(dn) is true. Fix both fs/ceph/dir.c sites the same way: only call d_add() for a negative dentry when it is actually unhashed. If the negative dentry is already hashed, leave it in place and reuse it as-is. This preserves the existing behavior for unhashed dentries while avoiding d_hash list corruption for reused hashed negatives. Cc: stable@vger.kernel.org Fixes: 2817b000b02c ("ceph: directory operations") Signed-off-by: Max Kellermann Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/dir.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 45e5edecc0cbc..66e592c47e57a 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -769,7 +769,8 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, d_drop(dentry); err = -ENOENT; } else { - d_add(dentry, NULL); + if (d_unhashed(dentry)) + d_add(dentry, NULL); } } } @@ -840,7 +841,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, spin_unlock(&ci->i_ceph_lock); doutc(cl, " dir %llx.%llx complete, -ENOENT\n", ceph_vinop(dir)); - d_add(dentry, NULL); + if (d_unhashed(dentry)) + d_add(dentry, NULL); di->lease_shared_gen = atomic_read(&ci->i_shared_gen); return NULL; } From 4db061a405435c35c2be9087ea3958eed95bb42a Mon Sep 17 00:00:00 2001 From: David Carlier Date: Fri, 17 Apr 2026 06:54:08 +0100 Subject: [PATCH 237/554] gtp: disable BH before calling udp_tunnel_xmit_skb() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5638504a2aa9e1b9d72af9060df1a160cce2d379 upstream. gtp_genl_send_echo_req() runs as a generic netlink doit handler in process context with BH not disabled. It calls udp_tunnel_xmit_skb(), which eventually invokes iptunnel_xmit() — that uses __this_cpu_inc/dec on softnet_data.xmit.recursion to track the tunnel xmit recursion level. Without local_bh_disable(), the task may migrate between dev_xmit_recursion_inc() and dev_xmit_recursion_dec(), breaking the per-CPU counter pairing. The result is stale or negative recursion levels that can later produce false-positive SKB_DROP_REASON_RECURSION_LIMIT drops on either CPU. The other udp_tunnel_xmit_skb() call sites in gtp.c are unaffected: the data path runs under ndo_start_xmit and the echo response handlers run from the UDP encap rx softirq, both with BH already disabled. Fix it by disabling BH around the udp_tunnel_xmit_skb() call, mirroring commit 2cd7e6971fc2 ("sctp: disable BH before calling udp_tunnel_xmit_skb()"). Fixes: 6f1a9140ecda ("net: add xmit recursion limit to tunnel xmit functions") Cc: stable@vger.kernel.org Signed-off-by: David Carlier Link: https://patch.msgid.link/20260417055408.4667-1-devnexen@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 5cb59d72bc820..2f8626c3824d6 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -2400,6 +2400,7 @@ static int gtp_genl_send_echo_req(struct sk_buff *skb, struct genl_info *info) return -ENODEV; } + local_bh_disable(); udp_tunnel_xmit_skb(rt, sk, skb_to_send, fl4.saddr, fl4.daddr, inet_dscp_to_dsfield(fl4.flowi4_dscp), @@ -2409,6 +2410,7 @@ static int gtp_genl_send_echo_req(struct sk_buff *skb, struct genl_info *info) !net_eq(sock_net(sk), dev_net(gtp->dev)), false, 0); + local_bh_enable(); return 0; } From 9f4ba5966c7816fb8d2e13c3c16491d5b95505d9 Mon Sep 17 00:00:00 2001 From: Chen Zhao Date: Sun, 5 Apr 2026 18:44:55 +0300 Subject: [PATCH 238/554] IB/core: Fix zero dmac race in neighbor resolution commit 5e6de34d82b49cab9d8a42063e9cd0f22a4f31e5 upstream. dst_fetch_ha() checks nud_state without holding the neighbor lock, then copies ha under the seqlock. A race in __neigh_update() where nud_state is set to NUD_REACHABLE before ha is written allows dst_fetch_ha() to read a zero MAC address while the seqlock reports no concurrent writer. netevent_callback amplifies this by waking ALL pending addr_req workers when ANY neighbor becomes NUD_VALID. At scale (N peers resolving ARP concurrently), the hit probability scales as N^2, making it near-certain for large RDMA workloads. N(A): neigh_update(A) W(A): addr_resolve(A) | [sleep] | write_lock_bh(&A->lock) | | A->nud_state = NUD_REACHABLE | | // A->ha is still 0 | | [woken by netevent_cb() of | another neighbour] | | dst_fetch_ha(A) | | A->nud_state & NUD_VALID | | read_seqbegin(&A->ha_lock) | | snapshot = A->ha /* 0 */ | | read_seqretry(&A->ha_lock) | | return snapshot | seqlock(&A->ha_lock) | A->ha = mac_A /* too late */ | sequnlock(&A->ha_lock) | write_unlock_bh(&A->lock) The incorrect/zero mac is read and programmed in the device QP while it was not yet updated. This causes silent packet loss and eventual RETRY_EXC_ERR. Fix by holding the neighbor read lock across the nud_state check and ha copy in dst_fetch_ha(), ensuring it synchronizes with __neigh_update() which is updating while holding the write lock. Cc: stable@vger.kernel.org Fixes: 92ebb6a0a13a ("IB/cm: Remove now useless rcu_lock in dst_fetch_ha") Link: https://patch.msgid.link/r/20260405-fix-dmac-race-v1-1-cfa1ec2ce54a@nvidia.com Signed-off-by: Chen Zhao Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/addr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 35ba852a172aa..d2a075a781ce6 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -321,11 +321,14 @@ static int dst_fetch_ha(const struct dst_entry *dst, if (!n) return -ENODATA; + read_lock_bh(&n->lock); if (!(n->nud_state & NUD_VALID)) { + read_unlock_bh(&n->lock); neigh_event_send(n, NULL); ret = -ENODATA; } else { neigh_ha_snapshot(dev_addr->dst_dev_addr, n, dst->dev); + read_unlock_bh(&n->lock); } neigh_release(n); From fc8d81f189f080ab6679f6c1b825319536905230 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 20 Apr 2026 14:24:26 -0400 Subject: [PATCH 239/554] ktest: Fix the month in the name of the failure directory commit 768059ede35f197575a38b10797b52402d9d4d2f upstream. The Perl localtime() function returns the month starting at 0 not 1. This caused the date produced to create the directory for saving files of a failed run to have the month off by one. machine-test-useconfig-fail-20260314073628 The above happened in April, not March. The correct name should have been: machine-test-useconfig-fail-20260414073628 This was somewhat confusing. Cc: stable@vger.kernel.org Cc: John 'Warthog9' Hawley Link: https://patch.msgid.link/20260420142426.33ad0293@fedora Fixes: 7faafbd69639b ("ktest: Add open and close console and start stop monitor") Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- tools/testing/ktest/ktest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 001c4df9f7df6..88de775097fef 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1815,7 +1815,7 @@ sub save_logs { my ($result, $basedir) = @_; my @t = localtime; my $date = sprintf "%04d%02d%02d%02d%02d%02d", - 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0]; + 1900+$t[5],$t[4]+1,$t[3],$t[2],$t[1],$t[0]; my $type = $build_type; if ($type =~ /useconfig/) { From 83e31039e0d7f779f059a7873a8afc6fbe2ee6c3 Mon Sep 17 00:00:00 2001 From: Tushar Sariya Date: Sat, 4 Apr 2026 11:58:03 -0230 Subject: [PATCH 240/554] NFSv4.1: Apply session size limits on clone path commit 8c787b286f39c7584440b97b92f87cbe934c13ff upstream. nfs4_clone_server() builds a child nfs_server for same-server automounted submounts but never calls nfs4_session_limit_rwsize() or nfs4_session_limit_xasize() after nfs_clone_server(). This means the child mount can end up with rsize/wsize values that exceed the negotiated session channel limits, causing NFS4ERR_REQ_TOO_BIG and EIO on servers that enforce tight max_request_size budgets. Top-level mounts go through nfs4_server_common_setup() which calls these limiters after nfs_probe_server(). Apply the same clamping on the clone path for consistency. Fixes: 2b092175f5e3 ("NFS: Fix inheritance of the block sizes when automounting") Cc: stable@vger.kernel.org Signed-off-by: Tushar Sariya Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/internal.h | 2 ++ fs/nfs/nfs4client.c | 4 ++-- fs/nfs/nfs4proc.c | 3 +++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2e596244799f3..61216b30cb93f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -253,6 +253,8 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, u32 minor_version); extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, struct inode *); +extern void nfs4_session_limit_rwsize(struct nfs_server *server); +extern void nfs4_session_limit_xasize(struct nfs_server *server); extern struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, const struct sockaddr_storage *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 4ff0e9dd1145e..495cace1e5646 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1033,7 +1033,7 @@ EXPORT_SYMBOL_GPL(nfs4_set_ds_client); * Limit the mount rsize, wsize and dtsize using negotiated fore * channel attributes. */ -static void nfs4_session_limit_rwsize(struct nfs_server *server) +void nfs4_session_limit_rwsize(struct nfs_server *server) { #ifdef CONFIG_NFS_V4_1 struct nfs4_session *sess; @@ -1058,7 +1058,7 @@ static void nfs4_session_limit_rwsize(struct nfs_server *server) /* * Limit xattr sizes using the channel attributes. */ -static void nfs4_session_limit_xasize(struct nfs_server *server) +void nfs4_session_limit_xasize(struct nfs_server *server) { #ifdef CONFIG_NFS_V4_2 struct nfs4_session *sess; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3745c59f0af25..711812d839909 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -11000,6 +11000,9 @@ static struct nfs_server *nfs4_clone_server(struct nfs_server *source, if (IS_ERR(server)) return server; + nfs4_session_limit_rwsize(server); + nfs4_session_limit_xasize(server); + error = nfs4_delegation_hash_alloc(server); if (error) { nfs_free_server(server); From d3012690a7065d9ca86521a525ad11e8af491d45 Mon Sep 17 00:00:00 2001 From: Tobias Gaertner Date: Sun, 29 Mar 2026 04:17:02 -0700 Subject: [PATCH 241/554] ntfs3: add buffer boundary checks to run_unpack() commit b62567bca47408e6739dee75f02a2113548af875 upstream. run_unpack() checks `run_buf < run_last` at the top of the while loop but then reads size_size and offset_size bytes via run_unpack_s64() without verifying they fit within the remaining buffer. A crafted NTFS image with truncated run data in an MFT attribute triggers an OOB heap read of up to 15 bytes when the filesystem is mounted. Add boundary checks before each run_unpack_s64() call to ensure the declared field size does not exceed the remaining buffer. Found by fuzzing with a source-patched harness (LibAFL + QEMU). Fixes: 82cae269cfa95 ("fs/ntfs3: Add initialization of super block") Cc: stable@vger.kernel.org Signed-off-by: Tobias Gaertner Signed-off-by: Konstantin Komarov Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/run.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c index 21decc2922823..087c13a60c205 100644 --- a/fs/ntfs3/run.c +++ b/fs/ntfs3/run.c @@ -963,6 +963,9 @@ int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, if (size_size > sizeof(len)) return -EINVAL; + if (run_buf + size_size > run_last) + return -EINVAL; + len = run_unpack_s64(run_buf, size_size, 0); /* Skip size_size. */ run_buf += size_size; @@ -975,6 +978,9 @@ int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, else if (offset_size <= sizeof(s64)) { s64 dlcn; + if (run_buf + offset_size > run_last) + return -EINVAL; + /* Initial value of dlcn is -1 or 0. */ dlcn = (run_buf[offset_size - 1] & 0x80) ? (s64)-1 : 0; dlcn = run_unpack_s64(run_buf, offset_size, dlcn); From f1af27cec07a9fd0847166bdb23c99e86b05bfdc Mon Sep 17 00:00:00 2001 From: Tobias Gaertner Date: Sun, 29 Mar 2026 04:17:03 -0700 Subject: [PATCH 242/554] ntfs3: fix integer overflow in run_unpack() volume boundary check commit 984a415f019536ea2d24de9010744e5302a9a948 upstream. The volume boundary check `lcn + len > sbi->used.bitmap.nbits` uses raw addition which can wrap around for large lcn and len values, bypassing the validation. Use check_add_overflow() as is already done for the adjacent prev_lcn + dlcn and vcn64 + len checks added by commit 3ac37e100385 ("ntfs3: Fix integer overflow in run_unpack()"). Found by fuzzing with a source-patched harness (LibAFL + QEMU). Fixes: 82cae269cfa95 ("fs/ntfs3: Add initialization of super block") Cc: stable@vger.kernel.org Signed-off-by: Tobias Gaertner Signed-off-by: Konstantin Komarov Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/run.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c index 087c13a60c205..de9081be462e5 100644 --- a/fs/ntfs3/run.c +++ b/fs/ntfs3/run.c @@ -1020,9 +1020,15 @@ int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, return -EOPNOTSUPP; } #endif - if (lcn != SPARSE_LCN64 && lcn + len > sbi->used.bitmap.nbits) { - /* LCN range is out of volume. */ - return -EINVAL; + if (lcn != SPARSE_LCN64) { + u64 lcn_end; + + if (check_add_overflow(lcn, len, &lcn_end)) + return -EINVAL; + if (lcn_end > sbi->used.bitmap.nbits) { + /* LCN range is out of volume. */ + return -EINVAL; + } } if (!run) From 3fb7394a837740770f0d6b4b30567e60786a63f2 Mon Sep 17 00:00:00 2001 From: Keenan Dong Date: Wed, 8 Apr 2026 16:46:00 +0800 Subject: [PATCH 243/554] rtmutex: Use waiter::task instead of current in remove_waiter() commit 3bfdc63936dd4773109b7b8c280c0f3b5ae7d349 upstream. remove_waiter() is used by the slowlock paths, but it is also used for proxy-lock rollback in rt_mutex_start_proxy_lock() when invoked from futex_requeue(). In the latter case waiter::task is not current, but remove_waiter() operates on current for the dequeue operation. That results in several problems: 1) the rbtree dequeue happens without waiter::task::pi_lock being held 2) the waiter task's pi_blocked_on state is not cleared, which leaves a dangling pointer primed for UAF around. 3) rt_mutex_adjust_prio_chain() operates on the wrong top priority waiter task Use waiter::task instead of current in all related operations in remove_waiter() to cure those problems. [ tglx: Fixup rt_mutex_adjust_prio_chain(), add a comment and amend the changelog ] Fixes: 8161239a8bcc ("rtmutex: Simplify PI algorithm and make highest prio task get lock") Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Keenan Dong Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- kernel/locking/rtmutex.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index c80902eacd797..e6c6dd0868873 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1535,20 +1535,23 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, * * Must be called with lock->wait_lock held and interrupts disabled. It must * have just failed to try_to_take_rt_mutex(). + * + * When invoked from rt_mutex_start_proxy_lock() waiter::task != current ! */ static void __sched remove_waiter(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) { bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); struct task_struct *owner = rt_mutex_owner(lock); + struct task_struct *waiter_task = waiter->task; struct rt_mutex_base *next_lock; lockdep_assert_held(&lock->wait_lock); - raw_spin_lock(¤t->pi_lock); - rt_mutex_dequeue(lock, waiter); - current->pi_blocked_on = NULL; - raw_spin_unlock(¤t->pi_lock); + scoped_guard(raw_spinlock, &waiter_task->pi_lock) { + rt_mutex_dequeue(lock, waiter); + waiter_task->pi_blocked_on = NULL; + } /* * Only update priority if the waiter was the highest priority @@ -1584,7 +1587,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock, raw_spin_unlock_irq(&lock->wait_lock); rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, - next_lock, NULL, current); + next_lock, NULL, waiter_task); raw_spin_lock_irq(&lock->wait_lock); } From 43222ac484f93b3ec2d240a7575e1cedd31f5fa4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 22 Apr 2026 17:14:34 +0100 Subject: [PATCH 244/554] rxgk: Fix potential integer overflow in length check commit 6929350080f4da292d111a3b33e53138fee51cec upstream. Fix potential integer overflow in rxgk_extract_token() when checking the length of the ticket. Rather than rounding up the value to be tested (which might overflow), round down the size of the available data. Fixes: 2429a1976481 ("rxrpc: Fix untrusted unsigned subtract") Closes: https://sashiko.dev/#/patchset/20260408121252.2249051-1-dhowells%40redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260422161438.2593376-6-dhowells@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/rxgk_app.c | 2 +- net/rxrpc/rxgk_common.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/rxgk_app.c b/net/rxrpc/rxgk_app.c index 30b6701a98670..0ef2a29eb6958 100644 --- a/net/rxrpc/rxgk_app.c +++ b/net/rxrpc/rxgk_app.c @@ -214,7 +214,7 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb, ticket_len = ntohl(container.token_len); ticket_offset = token_offset + sizeof(container); - if (xdr_round_up(ticket_len) > token_len - sizeof(container)) + if (ticket_len > xdr_round_down(token_len - sizeof(container))) goto short_packet; _debug("KVNO %u", kvno); diff --git a/net/rxrpc/rxgk_common.h b/net/rxrpc/rxgk_common.h index 80164d89e19c0..1e257d7ab8ec1 100644 --- a/net/rxrpc/rxgk_common.h +++ b/net/rxrpc/rxgk_common.h @@ -34,6 +34,7 @@ struct rxgk_context { }; #define xdr_round_up(x) (round_up((x), sizeof(__be32))) +#define xdr_round_down(x) (round_down((x), sizeof(__be32))) #define xdr_object_len(x) (4 + xdr_round_up(x)) /* From 9da6e314ea5fa246b8c1906b7422537175968f8e Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 25 Mar 2026 22:21:00 +0100 Subject: [PATCH 245/554] sched_ext: Documentation: Clarify ops.dispatch() role in task lifecycle commit a313357a346839d40b3a4dec393c71bf30cbb34c upstream. ops.dispatch() is invoked when a CPU becomes available. This can occur when a task voluntarily yields the CPU, exhausts its time slice, or is preempted for other reasons. If the task is still runnable, refilling its time slice in ops.dispatch() (either by the BPF scheduler or the sched_ext core) allows it to continue running without triggering ops.stopping(). However, this behavior is not clearly reflected in the current task lifecycle diagram. Update the diagram to better represent this interaction. Fixes: 9465f44d2df2 ("sched_ext: Documentation: Clarify time slice handling in task lifecycle") Cc: stable@vger.kernel.org # v6.17+ Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- Documentation/scheduler/sched-ext.rst | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst index 404fe6126a769..a72461b1c9305 100644 --- a/Documentation/scheduler/sched-ext.rst +++ b/Documentation/scheduler/sched-ext.rst @@ -321,13 +321,15 @@ by a sched_ext scheduler: ops.dispatch(); /* Task is moved to a local DSQ */ } ops.running(); /* Task starts running on its assigned CPU */ - while (task->scx.slice > 0 && task is runnable) - ops.tick(); /* Called every 1/HZ seconds */ - ops.stopping(); /* Task stops running (time slice expires or wait) */ - /* Task's CPU becomes available */ + while task_is_runnable(p) { + while (task->scx.slice > 0 && task_is_runnable(p)) + ops.tick(); /* Called every 1/HZ seconds */ + + ops.dispatch(); /* task->scx.slice can be refilled */ + } - ops.dispatch(); /* task->scx.slice can be refilled */ + ops.stopping(); /* Task stops running (time slice expires or wait) */ } ops.quiescent(); /* Task releases its assigned CPU (wait) */ From 13e550fbfccdb311e76ec96892dfe35f0dba0657 Mon Sep 17 00:00:00 2001 From: Yang Xiuwei Date: Mon, 30 Mar 2026 09:49:52 +0800 Subject: [PATCH 246/554] scsi: sd: fix missing put_disk() when device_add(&disk_dev) fails commit 1e111c4b3a726df1254670a5cc4868cedb946d37 upstream. If device_add(&sdkp->disk_dev) fails, put_device() runs scsi_disk_release(), which frees the scsi_disk but leaves the gendisk referenced. The device_add_disk() error path in sd_probe() calls put_disk(gd); call put_disk(gd) here to mirror that cleanup. Fixes: 265dfe8ebbab ("scsi: sd: Free scsi_disk device via put_device()") Cc: stable@vger.kernel.org Reviewed-by: John Garry Signed-off-by: Yang Xiuwei Link: https://patch.msgid.link/20260330014952.152776-1-yangxiuwei@kylinos.cn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0252d3f6bed17..072d4c4add334 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3979,6 +3979,7 @@ static int sd_probe(struct device *dev) error = device_add(&sdkp->disk_dev); if (error) { put_device(&sdkp->disk_dev); + put_disk(gd); goto out; } From bbc4bfae9e432af8afd8ca38a01b6dd95b334029 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Sat, 18 Apr 2026 18:28:38 +0200 Subject: [PATCH 247/554] seg6: fix seg6 lwtunnel output redirect for L2 reduced encap mode commit ade67d5f588832c7ba131aadd4215a94ce0a15c8 upstream. When SEG6_IPTUN_MODE_L2ENCAP_RED (L2ENCAP_RED) was introduced, the condition in seg6_build_state() that excludes L2 encap modes from setting LWTUNNEL_STATE_OUTPUT_REDIRECT was not updated to account for the new mode. As a consequence, L2ENCAP_RED routes incorrectly trigger seg6_output() on the output path, where the packet is silently dropped because skb_mac_header_was_set() fails on L3 packets. Extend the check to also exclude L2ENCAP_RED, consistent with L2ENCAP. Fixes: 13f0296be8ec ("seg6: add support for SRv6 H.L2Encaps.Red behavior") Cc: stable@vger.kernel.org Signed-off-by: Andrea Mayer Reviewed-by: Justin Iurman Link: https://patch.msgid.link/20260418162838.31979-1-andrea.mayer@uniroma2.it Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/seg6_iptunnel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index d6a0f7df90807..351a0ed7441cc 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -715,7 +715,8 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, newts->type = LWTUNNEL_ENCAP_SEG6; newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; - if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP) + if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP && + tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP_RED) newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; newts->headroom = seg6_lwt_headroom(tuninfo); From 6f977b0472f700e4bcb981342ceacf2f1b1a3671 Mon Sep 17 00:00:00 2001 From: Anthony Yznaga Date: Tue, 28 Apr 2026 16:05:33 -0400 Subject: [PATCH 248/554] mm: prevent droppable mappings from being locked [ Upstream commit d239462787b072c78eb19fc1f155c3d411256282 ] Droppable mappings must not be lockable. There is a check for VMAs with VM_DROPPABLE set in mlock_fixup() along with checks for other types of unlockable VMAs which ensures this when calling mlock()/mlock2(). For mlockall(MCL_FUTURE), the check for unlockable VMAs is different. In apply_mlockall_flags(), if the flags parameter has MCL_FUTURE set, the current task's mm's default VMA flag field mm->def_flags has VM_LOCKED applied to it. VM_LOCKONFAULT is also applied if MCL_ONFAULT is also set. When these flags are set as default in this manner they are cleared in __mmap_complete() for new mappings that do not support mlock. A check for VM_DROPPABLE in __mmap_complete() is missing resulting in droppable mappings created with VM_LOCKED set. To fix this and reduce that chance of similar bugs in the future, introduce and use vma_supports_mlock(). Link: https://lkml.kernel.org/r/20260310155821.17869-1-anthony.yznaga@oracle.com Fixes: 9651fcedf7b9 ("mm: add MAP_DROPPABLE for designating always lazily freeable mappings") Signed-off-by: Anthony Yznaga Suggested-by: David Hildenbrand Acked-by: David Hildenbrand (Arm) Reviewed-by: Pedro Falcato Reviewed-by: Lorenzo Stoakes (Oracle) Tested-by: Lorenzo Stoakes (Oracle) Cc: Jann Horn Cc: Jason A. Donenfeld Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton [ added const to is_vm_hugetlb_page and stubbed vma_supports_mlock in vma_internal.h instead of the split-out stubs.h ] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/hugetlb_inline.h | 4 ++-- mm/internal.h | 10 ++++++++++ mm/mlock.c | 10 ++++++---- mm/vma.c | 4 +--- tools/testing/vma/vma_internal.h | 7 ++++++- 5 files changed, 25 insertions(+), 10 deletions(-) diff --git a/include/linux/hugetlb_inline.h b/include/linux/hugetlb_inline.h index 0660a03d37d98..846185ea626c7 100644 --- a/include/linux/hugetlb_inline.h +++ b/include/linux/hugetlb_inline.h @@ -6,14 +6,14 @@ #include -static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) +static inline bool is_vm_hugetlb_page(const struct vm_area_struct *vma) { return !!(vma->vm_flags & VM_HUGETLB); } #else -static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) +static inline bool is_vm_hugetlb_page(const struct vm_area_struct *vma) { return false; } diff --git a/mm/internal.h b/mm/internal.h index 4cf6a12ff491d..f5c0d927924d0 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1129,6 +1129,16 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, } return fpin; } + +static inline bool vma_supports_mlock(const struct vm_area_struct *vma) +{ + if (vma->vm_flags & (VM_SPECIAL | VM_DROPPABLE)) + return false; + if (vma_is_dax(vma) || is_vm_hugetlb_page(vma)) + return false; + return vma != get_gate_vma(current->mm); +} + #else /* !CONFIG_MMU */ static inline void unmap_mapping_folio(struct folio *folio) { } static inline void mlock_new_folio(struct folio *folio) { } diff --git a/mm/mlock.c b/mm/mlock.c index bb0776f5ef7ca..f59c6d8d376ff 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -472,10 +472,12 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, int ret = 0; vm_flags_t oldflags = vma->vm_flags; - if (newflags == oldflags || (oldflags & VM_SPECIAL) || - is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || - vma_is_dax(vma) || vma_is_secretmem(vma) || (oldflags & VM_DROPPABLE)) - /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ + if (newflags == oldflags || vma_is_secretmem(vma) || + !vma_supports_mlock(vma)) + /* + * Don't set VM_LOCKED or VM_LOCKONFAULT and don't count. + * For secretmem, don't allow the memory to be unlocked. + */ goto out; vma = vma_modify_flags(vmi, *prev, vma, start, end, newflags); diff --git a/mm/vma.c b/mm/vma.c index 5815ae9e57703..eeb6a187c3d8b 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -2571,9 +2571,7 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma) vm_stat_account(mm, vma->vm_flags, map->pglen); if (vm_flags & VM_LOCKED) { - if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || - is_vm_hugetlb_page(vma) || - vma == get_gate_vma(mm)) + if (!vma_supports_mlock(vma)) vm_flags_clear(vma, VM_LOCKED_MASK); else mm->locked_vm += map->pglen; diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index dc976a285ad2c..9f724954a0f6b 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -989,7 +989,12 @@ static inline bool mapping_can_writeback(struct address_space *mapping) return true; } -static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) +static inline bool is_vm_hugetlb_page(const struct vm_area_struct *vma) +{ + return false; +} + +static inline bool vma_supports_mlock(const struct vm_area_struct *vma) { return false; } From 5e07126d7ab8f22ae88dffda7498bf63e75ec676 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Tue, 28 Apr 2026 10:32:37 -0400 Subject: [PATCH 249/554] arm64: mm: Simplify check in arch_kfence_init_pool() [ Upstream commit b7737c38e7cb611c2fbd87af3b09afeb92c96fe7 ] TL;DR: checking force_pte_mapping() in arch_kfence_init_pool() is sufficient Commit ce2b3a50ad92 ("arm64: mm: Don't sleep in split_kernel_leaf_mapping() when in atomic context") recently added an arm64 implementation of arch_kfence_init_pool() to ensure that the KFENCE pool is PTE-mapped. Assuming that the pool was not initialised early, block splitting is necessary if the linear mapping is not fully PTE-mapped, in other words if force_pte_mapping() is false. arch_kfence_init_pool() currently makes another check: whether BBML2-noabort is supported, i.e. whether we are *able* to split block mappings. This check is however unnecessary, because force_pte_mapping() is always true if KFENCE is enabled and BBML2-noabort is not supported. This must be the case by design, since KFENCE requires PTE-mapped pages in all cases. We can therefore remove that check. The situation is different in split_kernel_leaf_mapping(), as that function is called unconditionally regardless of the configuration. If BBML2-noabort is not supported, it cannot do anything and bails out. If force_pte_mapping() is true, there is nothing to do and it also bails out, but these are independent checks. Commit 53357f14f924 ("arm64: mm: Tidy up force_pte_mapping()") grouped these checks into a helper, split_leaf_mapping_possible(). This isn't so helpful as only split_kernel_leaf_mapping() should check both. Revert the parts of that commit that introduced the helper, reintroducing the more accurate comments in split_kernel_leaf_mapping(). Signed-off-by: Kevin Brodsky Reviewed-by: Ryan Roberts Signed-off-by: Catalin Marinas Stable-dep-of: f12b435de2f2 ("arm64: mm: Fix rodata=full block mapping support for realm guests") Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/mmu.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 75698900c17df..65b73fd012282 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -773,18 +773,6 @@ static inline bool force_pte_mapping(void) return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world(); } -static inline bool split_leaf_mapping_possible(void) -{ - /* - * !BBML2_NOABORT systems should never run into scenarios where we would - * have to split. So exit early and let calling code detect it and raise - * a warning. - */ - if (!system_supports_bbml2_noabort()) - return false; - return !force_pte_mapping(); -} - static DEFINE_MUTEX(pgtable_split_lock); int split_kernel_leaf_mapping(unsigned long start, unsigned long end) @@ -792,11 +780,22 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) int ret; /* - * Exit early if the region is within a pte-mapped area or if we can't - * split. For the latter case, the permission change code will raise a - * warning if not already pte-mapped. + * !BBML2_NOABORT systems should not be trying to change permissions on + * anything that is not pte-mapped in the first place. Just return early + * and let the permission change code raise a warning if not already + * pte-mapped. */ - if (!split_leaf_mapping_possible() || is_kfence_address((void *)start)) + if (!system_supports_bbml2_noabort()) + return 0; + + /* + * If the region is within a pte-mapped area, there is no need to try to + * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may + * change permissions from atomic context so for those cases (which are + * always pte-mapped), we must not go any further because taking the + * mutex below may sleep. + */ + if (force_pte_mapping() || is_kfence_address((void *)start)) return 0; /* @@ -1095,7 +1094,7 @@ bool arch_kfence_init_pool(void) int ret; /* Exit early if we know the linear map is already pte-mapped. */ - if (!split_leaf_mapping_possible()) + if (force_pte_mapping()) return true; /* Kfence pool is already pte-mapped for the early init case. */ From 1e67c82fb7781d6af2ff999ec206515820bd9457 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 28 Apr 2026 10:32:38 -0400 Subject: [PATCH 250/554] arm64: mm: Fix rodata=full block mapping support for realm guests [ Upstream commit f12b435de2f2bb09ce406467020181ada528844c ] Commit a166563e7ec37 ("arm64: mm: support large block mapping when rodata=full") enabled the linear map to be mapped by block/cont while still allowing granular permission changes on BBML2_NOABORT systems by lazily splitting the live mappings. This mechanism was intended to be usable by realm guests since they need to dynamically share dma buffers with the host by "decrypting" them - which for Arm CCA, means marking them as shared in the page tables. However, it turns out that the mechanism was failing for realm guests because realms need to share their dma buffers (via __set_memory_enc_dec()) much earlier during boot than split_kernel_leaf_mapping() was able to handle. The report linked below showed that GIC's ITS was one such user. But during the investigation I found other callsites that could not meet the split_kernel_leaf_mapping() constraints. The problem is that we block map the linear map based on the boot CPU supporting BBML2_NOABORT, then check that all the other CPUs support it too when finalizing the caps. If they don't, then we stop_machine() and split to ptes. For safety, split_kernel_leaf_mapping() previously wouldn't permit splitting until after the caps were finalized. That ensured that if any secondary cpus were running that didn't support BBML2_NOABORT, we wouldn't risk breaking them. I've fix this problem by reducing the black-out window where we refuse to split; there are now 2 windows. The first is from T0 until the page allocator is inititialized. Splitting allocates memory for the page allocator so it must be in use. The second covers the period between starting to online the secondary cpus until the system caps are finalized (this is a very small window). All of the problematic callers are calling __set_memory_enc_dec() before the secondary cpus come online, so this solves the problem. However, one of these callers, swiotlb_update_mem_attributes(), was trying to split before the page allocator was initialized. So I have moved this call from arch_mm_preinit() to mem_init(), which solves the ordering issue. I've added warnings and return an error if any attempt is made to split in the black-out windows. Note there are other issues which prevent booting all the way to user space, which will be fixed in subsequent patches. Reported-by: Jinjiang Tu Closes: https://lore.kernel.org/all/0b2a4ae5-fc51-4d77-b177-b2e9db74f11d@huawei.com/ Fixes: a166563e7ec3 ("arm64: mm: support large block mapping when rodata=full") Cc: stable@vger.kernel.org Reviewed-by: Kevin Brodsky Signed-off-by: Ryan Roberts Reviewed-by: Suzuki K Poulose Tested-by: Suzuki K Poulose Signed-off-by: Catalin Marinas [ adjusted context to use `__ASSEMBLY__` instead of `__ASSEMBLER__` ] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/mmu.h | 2 ++ arch/arm64/mm/init.c | 9 +++++++- arch/arm64/mm/mmu.c | 45 +++++++++++++++++++++++++----------- 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 78a4dbf75e602..c5d187769c6c9 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -112,5 +112,7 @@ void kpti_install_ng_mappings(void); static inline void kpti_install_ng_mappings(void) {} #endif +extern bool page_alloc_available; + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 524d34a0e9219..341ae4f15845e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -357,7 +357,6 @@ void __init arch_mm_preinit(void) } swiotlb_init(swiotlb, flags); - swiotlb_update_mem_attributes(); /* * Check boundaries twice: Some fundamental inconsistencies can be @@ -384,6 +383,14 @@ void __init arch_mm_preinit(void) } } +bool page_alloc_available __ro_after_init; + +void __init mem_init(void) +{ + page_alloc_available = true; + swiotlb_update_mem_attributes(); +} + void free_initmem(void) { void *lm_init_begin = lm_alias(__init_begin); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 65b73fd012282..56be3d8a459c6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -774,30 +774,51 @@ static inline bool force_pte_mapping(void) } static DEFINE_MUTEX(pgtable_split_lock); +static bool linear_map_requires_bbml2; int split_kernel_leaf_mapping(unsigned long start, unsigned long end) { int ret; - /* - * !BBML2_NOABORT systems should not be trying to change permissions on - * anything that is not pte-mapped in the first place. Just return early - * and let the permission change code raise a warning if not already - * pte-mapped. - */ - if (!system_supports_bbml2_noabort()) - return 0; - /* * If the region is within a pte-mapped area, there is no need to try to * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may * change permissions from atomic context so for those cases (which are * always pte-mapped), we must not go any further because taking the - * mutex below may sleep. + * mutex below may sleep. Do not call force_pte_mapping() here because + * it could return a confusing result if called from a secondary cpu + * prior to finalizing caps. Instead, linear_map_requires_bbml2 gives us + * what we need. */ - if (force_pte_mapping() || is_kfence_address((void *)start)) + if (!linear_map_requires_bbml2 || is_kfence_address((void *)start)) return 0; + if (!system_supports_bbml2_noabort()) { + /* + * !BBML2_NOABORT systems should not be trying to change + * permissions on anything that is not pte-mapped in the first + * place. Just return early and let the permission change code + * raise a warning if not already pte-mapped. + */ + if (system_capabilities_finalized()) + return 0; + + /* + * Boot-time: split_kernel_leaf_mapping_locked() allocates from + * page allocator. Can't split until it's available. + */ + if (WARN_ON(!page_alloc_available)) + return -EBUSY; + + /* + * Boot-time: Started secondary cpus but don't know if they + * support BBML2_NOABORT yet. Can't allow splitting in this + * window in case they don't. + */ + if (WARN_ON(num_online_cpus() > 1)) + return -EBUSY; + } + /* * Ensure start and end are at least page-aligned since this is the * finest granularity we can split to. @@ -897,8 +918,6 @@ static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp return ret; } -static bool linear_map_requires_bbml2 __initdata; - u32 idmap_kpti_bbml2_flag; static void __init init_idmap_kpti_bbml2_flag(void) From 38f113f81d3f0adc658a4475dd3ecaec985e21d3 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Tue, 28 Apr 2026 10:31:11 -0400 Subject: [PATCH 251/554] lib: test_hmm: evict device pages on file close to avoid use-after-free [ Upstream commit 744dd97752ef1076a8d8672bb0d8aa2c7abc1144 ] Patch series "Minor hmm_test fixes and cleanups". Two bugfixes a cleanup for the HMM kernel selftests. These were mostly reported by Zenghui Yu with special thanks to Lorenzo for analysing and pointing out the problems. This patch (of 3): When dmirror_fops_release() is called it frees the dmirror struct but doesn't migrate device private pages back to system memory first. This leaves those pages with a dangling zone_device_data pointer to the freed dmirror. If a subsequent fault occurs on those pages (eg. during coredump) the dmirror_devmem_fault() callback dereferences the stale pointer causing a kernel panic. This was reported [1] when running mm/ksft_hmm.sh on arm64, where a test failure triggered SIGABRT and the resulting coredump walked the VMAs faulting in the stale device private pages. Fix this by calling dmirror_device_evict_chunk() for each devmem chunk in dmirror_fops_release() to migrate all device private pages back to system memory before freeing the dmirror struct. The function is moved earlier in the file to avoid a forward declaration. Link: https://lore.kernel.org/20260331063445.3551404-1-apopple@nvidia.com Link: https://lore.kernel.org/20260331063445.3551404-2-apopple@nvidia.com Fixes: b2ef9f5a5cb3 ("mm/hmm/test: add selftest driver for HMM") Signed-off-by: Alistair Popple Reported-by: Zenghui Yu Closes: https://lore.kernel.org/linux-mm/8bd0396a-8997-4d2e-a13f-5aac033083d7@linux.dev/ Reviewed-by: Balbir Singh Tested-by: Zenghui Yu Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: Liam Howlett Cc: Lorenzo Stoakes (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Zenghui Yu Cc: Matthew Brost Cc: Signed-off-by: Andrew Morton [ kept the existing simpler `dmirror_device_evict_chunk()` body instead of the upstream compound-folio version ] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- lib/test_hmm.c | 86 ++++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 83e3d8208a540..00d34a6c6276b 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -183,11 +183,60 @@ static int dmirror_fops_open(struct inode *inode, struct file *filp) return 0; } +static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk) +{ + unsigned long start_pfn = chunk->pagemap.range.start >> PAGE_SHIFT; + unsigned long end_pfn = chunk->pagemap.range.end >> PAGE_SHIFT; + unsigned long npages = end_pfn - start_pfn + 1; + unsigned long i; + unsigned long *src_pfns; + unsigned long *dst_pfns; + + src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL); + dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL); + + migrate_device_range(src_pfns, start_pfn, npages); + for (i = 0; i < npages; i++) { + struct page *dpage, *spage; + + spage = migrate_pfn_to_page(src_pfns[i]); + if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) + continue; + + if (WARN_ON(!is_device_private_page(spage) && + !is_device_coherent_page(spage))) + continue; + spage = BACKING_PAGE(spage); + dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL); + lock_page(dpage); + copy_highpage(dpage, spage); + dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)); + if (src_pfns[i] & MIGRATE_PFN_WRITE) + dst_pfns[i] |= MIGRATE_PFN_WRITE; + } + migrate_device_pages(src_pfns, dst_pfns, npages); + migrate_device_finalize(src_pfns, dst_pfns, npages); + kvfree(src_pfns); + kvfree(dst_pfns); +} + static int dmirror_fops_release(struct inode *inode, struct file *filp) { struct dmirror *dmirror = filp->private_data; + struct dmirror_device *mdevice = dmirror->mdevice; + int i; mmu_interval_notifier_remove(&dmirror->notifier); + + if (mdevice->devmem_chunks) { + for (i = 0; i < mdevice->devmem_count; i++) { + struct dmirror_chunk *devmem = + mdevice->devmem_chunks[i]; + + dmirror_device_evict_chunk(devmem); + } + } + xa_destroy(&dmirror->pt); kfree(dmirror); return 0; @@ -1192,43 +1241,6 @@ static int dmirror_snapshot(struct dmirror *dmirror, return ret; } -static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk) -{ - unsigned long start_pfn = chunk->pagemap.range.start >> PAGE_SHIFT; - unsigned long end_pfn = chunk->pagemap.range.end >> PAGE_SHIFT; - unsigned long npages = end_pfn - start_pfn + 1; - unsigned long i; - unsigned long *src_pfns; - unsigned long *dst_pfns; - - src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL); - dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL); - - migrate_device_range(src_pfns, start_pfn, npages); - for (i = 0; i < npages; i++) { - struct page *dpage, *spage; - - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) - continue; - - if (WARN_ON(!is_device_private_page(spage) && - !is_device_coherent_page(spage))) - continue; - spage = BACKING_PAGE(spage); - dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL); - lock_page(dpage); - copy_highpage(dpage, spage); - dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)); - if (src_pfns[i] & MIGRATE_PFN_WRITE) - dst_pfns[i] |= MIGRATE_PFN_WRITE; - } - migrate_device_pages(src_pfns, dst_pfns, npages); - migrate_device_finalize(src_pfns, dst_pfns, npages); - kvfree(src_pfns); - kvfree(dst_pfns); -} - /* Removes free pages from the free list so they can't be re-allocated */ static void dmirror_remove_free_pages(struct dmirror_chunk *devmem) { From 09033be12a82b796a4c3b8f1ef8860bda26b745b Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 28 Apr 2026 06:12:04 -0400 Subject: [PATCH 252/554] mei: me: use PCI_DEVICE_DATA macro [ Upstream commit 9e7a2409ecf4d411b7cc91615b08f6a7576f0aaa ] Drop old local MEI_PCI_DEVICE macro and use common PCI_DEVICE_DATA instead. Update defines to adhere to current naming convention. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Alexander Usyskin Link: https://patch.msgid.link/20260201094358.1440593-2-alexander.usyskin@intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: a5a1804332af ("mei: me: add nova lake point H DID") Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus-fixup.c | 6 +- drivers/misc/mei/hw-me-regs.h | 162 +++++++++++++------------- drivers/misc/mei/hw-me.h | 6 - drivers/misc/mei/pci-me.c | 208 +++++++++++++++++----------------- 4 files changed, 188 insertions(+), 194 deletions(-) diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index e6a1d3534663a..bea7a47d216e0 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -303,9 +303,9 @@ static void mei_wd(struct mei_cl_device *cldev) { struct pci_dev *pdev = to_pci_dev(cldev->dev.parent); - if (pdev->device == MEI_DEV_ID_WPT_LP || - pdev->device == MEI_DEV_ID_SPT || - pdev->device == MEI_DEV_ID_SPT_H) + if (pdev->device == PCI_DEVICE_ID_INTEL_MEI_WPT_LP || + pdev->device == PCI_DEVICE_ID_INTEL_MEI_SPT || + pdev->device == PCI_DEVICE_ID_INTEL_MEI_SPT_H) cldev->me_cl->props.protocol_version = 0x2; cldev->do_match = 1; diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index fa30899a5fa26..840e1fd2714c4 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -9,120 +9,120 @@ /* * MEI device IDs */ -#define MEI_DEV_ID_82946GZ 0x2974 /* 82946GZ/GL */ -#define MEI_DEV_ID_82G35 0x2984 /* 82G35 Express */ -#define MEI_DEV_ID_82Q965 0x2994 /* 82Q963/Q965 */ -#define MEI_DEV_ID_82G965 0x29A4 /* 82P965/G965 */ +#define PCI_DEVICE_ID_INTEL_MEI_82946GZ 0x2974 /* 82946GZ/GL */ +#define PCI_DEVICE_ID_INTEL_MEI_82G35 0x2984 /* 82G35 Express */ +#define PCI_DEVICE_ID_INTEL_MEI_82Q965 0x2994 /* 82Q963/Q965 */ +#define PCI_DEVICE_ID_INTEL_MEI_82G965 0x29A4 /* 82P965/G965 */ -#define MEI_DEV_ID_82GM965 0x2A04 /* Mobile PM965/GM965 */ -#define MEI_DEV_ID_82GME965 0x2A14 /* Mobile GME965/GLE960 */ +#define PCI_DEVICE_ID_INTEL_MEI_82GM965 0x2A04 /* Mobile PM965/GM965 */ +#define PCI_DEVICE_ID_INTEL_MEI_82GME965 0x2A14 /* Mobile GME965/GLE960 */ -#define MEI_DEV_ID_ICH9_82Q35 0x29B4 /* 82Q35 Express */ -#define MEI_DEV_ID_ICH9_82G33 0x29C4 /* 82G33/G31/P35/P31 Express */ -#define MEI_DEV_ID_ICH9_82Q33 0x29D4 /* 82Q33 Express */ -#define MEI_DEV_ID_ICH9_82X38 0x29E4 /* 82X38/X48 Express */ -#define MEI_DEV_ID_ICH9_3200 0x29F4 /* 3200/3210 Server */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9_82Q35 0x29B4 /* 82Q35 Express */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9_82G33 0x29C4 /* 82G33/G31/P35/P31 Express */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9_82Q33 0x29D4 /* 82Q33 Express */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9_82X38 0x29E4 /* 82X38/X48 Express */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9_3200 0x29F4 /* 3200/3210 Server */ -#define MEI_DEV_ID_ICH9_6 0x28B4 /* Bearlake */ -#define MEI_DEV_ID_ICH9_7 0x28C4 /* Bearlake */ -#define MEI_DEV_ID_ICH9_8 0x28D4 /* Bearlake */ -#define MEI_DEV_ID_ICH9_9 0x28E4 /* Bearlake */ -#define MEI_DEV_ID_ICH9_10 0x28F4 /* Bearlake */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9_6 0x28B4 /* Bearlake */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9_7 0x28C4 /* Bearlake */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9_8 0x28D4 /* Bearlake */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9_9 0x28E4 /* Bearlake */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9_10 0x28F4 /* Bearlake */ -#define MEI_DEV_ID_ICH9M_1 0x2A44 /* Cantiga */ -#define MEI_DEV_ID_ICH9M_2 0x2A54 /* Cantiga */ -#define MEI_DEV_ID_ICH9M_3 0x2A64 /* Cantiga */ -#define MEI_DEV_ID_ICH9M_4 0x2A74 /* Cantiga */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9M_1 0x2A44 /* Cantiga */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9M_2 0x2A54 /* Cantiga */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9M_3 0x2A64 /* Cantiga */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH9M_4 0x2A74 /* Cantiga */ -#define MEI_DEV_ID_ICH10_1 0x2E04 /* Eaglelake */ -#define MEI_DEV_ID_ICH10_2 0x2E14 /* Eaglelake */ -#define MEI_DEV_ID_ICH10_3 0x2E24 /* Eaglelake */ -#define MEI_DEV_ID_ICH10_4 0x2E34 /* Eaglelake */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH10_1 0x2E04 /* Eaglelake */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH10_2 0x2E14 /* Eaglelake */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH10_3 0x2E24 /* Eaglelake */ +#define PCI_DEVICE_ID_INTEL_MEI_ICH10_4 0x2E34 /* Eaglelake */ -#define MEI_DEV_ID_IBXPK_1 0x3B64 /* Calpella */ -#define MEI_DEV_ID_IBXPK_2 0x3B65 /* Calpella */ +#define PCI_DEVICE_ID_INTEL_MEI_IBXPK_1 0x3B64 /* Calpella */ +#define PCI_DEVICE_ID_INTEL_MEI_IBXPK_2 0x3B65 /* Calpella */ -#define MEI_DEV_ID_CPT_1 0x1C3A /* Couger Point */ -#define MEI_DEV_ID_PBG_1 0x1D3A /* C600/X79 Patsburg */ +#define PCI_DEVICE_ID_INTEL_MEI_CPT_1 0x1C3A /* Couger Point */ +#define PCI_DEVICE_ID_INTEL_MEI_PBG_1 0x1D3A /* C600/X79 Patsburg */ -#define MEI_DEV_ID_PPT_1 0x1E3A /* Panther Point */ -#define MEI_DEV_ID_PPT_2 0x1CBA /* Panther Point */ -#define MEI_DEV_ID_PPT_3 0x1DBA /* Panther Point */ +#define PCI_DEVICE_ID_INTEL_MEI_PPT_1 0x1E3A /* Panther Point */ +#define PCI_DEVICE_ID_INTEL_MEI_PPT_2 0x1CBA /* Panther Point */ +#define PCI_DEVICE_ID_INTEL_MEI_PPT_3 0x1DBA /* Panther Point */ -#define MEI_DEV_ID_LPT_H 0x8C3A /* Lynx Point H */ -#define MEI_DEV_ID_LPT_W 0x8D3A /* Lynx Point - Wellsburg */ -#define MEI_DEV_ID_LPT_LP 0x9C3A /* Lynx Point LP */ -#define MEI_DEV_ID_LPT_HR 0x8CBA /* Lynx Point H Refresh */ +#define PCI_DEVICE_ID_INTEL_MEI_LPT_H 0x8C3A /* Lynx Point H */ +#define PCI_DEVICE_ID_INTEL_MEI_LPT_W 0x8D3A /* Lynx Point - Wellsburg */ +#define PCI_DEVICE_ID_INTEL_MEI_LPT_LP 0x9C3A /* Lynx Point LP */ +#define PCI_DEVICE_ID_INTEL_MEI_LPT_HR 0x8CBA /* Lynx Point H Refresh */ -#define MEI_DEV_ID_WPT_LP 0x9CBA /* Wildcat Point LP */ -#define MEI_DEV_ID_WPT_LP_2 0x9CBB /* Wildcat Point LP 2 */ +#define PCI_DEVICE_ID_INTEL_MEI_WPT_LP 0x9CBA /* Wildcat Point LP */ +#define PCI_DEVICE_ID_INTEL_MEI_WPT_LP_2 0x9CBB /* Wildcat Point LP 2 */ -#define MEI_DEV_ID_SPT 0x9D3A /* Sunrise Point */ -#define MEI_DEV_ID_SPT_2 0x9D3B /* Sunrise Point 2 */ -#define MEI_DEV_ID_SPT_3 0x9D3E /* Sunrise Point 3 (iToutch) */ -#define MEI_DEV_ID_SPT_H 0xA13A /* Sunrise Point H */ -#define MEI_DEV_ID_SPT_H_2 0xA13B /* Sunrise Point H 2 */ +#define PCI_DEVICE_ID_INTEL_MEI_SPT 0x9D3A /* Sunrise Point */ +#define PCI_DEVICE_ID_INTEL_MEI_SPT_2 0x9D3B /* Sunrise Point 2 */ +#define PCI_DEVICE_ID_INTEL_MEI_SPT_3 0x9D3E /* Sunrise Point 3 (iToutch) */ +#define PCI_DEVICE_ID_INTEL_MEI_SPT_H 0xA13A /* Sunrise Point H */ +#define PCI_DEVICE_ID_INTEL_MEI_SPT_H_2 0xA13B /* Sunrise Point H 2 */ -#define MEI_DEV_ID_LBG 0xA1BA /* Lewisburg (SPT) */ +#define PCI_DEVICE_ID_INTEL_MEI_LBG 0xA1BA /* Lewisburg (SPT) */ -#define MEI_DEV_ID_BXT_M 0x1A9A /* Broxton M */ -#define MEI_DEV_ID_APL_I 0x5A9A /* Apollo Lake I */ +#define PCI_DEVICE_ID_INTEL_MEI_BXT_M 0x1A9A /* Broxton M */ +#define PCI_DEVICE_ID_INTEL_MEI_APL_I 0x5A9A /* Apollo Lake I */ -#define MEI_DEV_ID_DNV_IE 0x19E5 /* Denverton IE */ +#define PCI_DEVICE_ID_INTEL_MEI_DNV_IE 0x19E5 /* Denverton IE */ -#define MEI_DEV_ID_GLK 0x319A /* Gemini Lake */ +#define PCI_DEVICE_ID_INTEL_MEI_GLK 0x319A /* Gemini Lake */ -#define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */ -#define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */ -#define MEI_DEV_ID_KBP_3 0xA2BE /* Kaby Point 3 (iTouch) */ +#define PCI_DEVICE_ID_INTEL_MEI_KBP 0xA2BA /* Kaby Point */ +#define PCI_DEVICE_ID_INTEL_MEI_KBP_2 0xA2BB /* Kaby Point 2 */ +#define PCI_DEVICE_ID_INTEL_MEI_KBP_3 0xA2BE /* Kaby Point 3 (iTouch) */ -#define MEI_DEV_ID_CNP_LP 0x9DE0 /* Cannon Point LP */ -#define MEI_DEV_ID_CNP_LP_3 0x9DE4 /* Cannon Point LP 3 (iTouch) */ -#define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */ -#define MEI_DEV_ID_CNP_H_3 0xA364 /* Cannon Point H 3 (iTouch) */ +#define PCI_DEVICE_ID_INTEL_MEI_CNP_LP 0x9DE0 /* Cannon Point LP */ +#define PCI_DEVICE_ID_INTEL_MEI_CNP_LP_3 0x9DE4 /* Cannon Point LP 3 (iTouch) */ +#define PCI_DEVICE_ID_INTEL_MEI_CNP_H 0xA360 /* Cannon Point H */ +#define PCI_DEVICE_ID_INTEL_MEI_CNP_H_3 0xA364 /* Cannon Point H 3 (iTouch) */ -#define MEI_DEV_ID_CMP_LP 0x02e0 /* Comet Point LP */ -#define MEI_DEV_ID_CMP_LP_3 0x02e4 /* Comet Point LP 3 (iTouch) */ +#define PCI_DEVICE_ID_INTEL_MEI_CMP_LP 0x02e0 /* Comet Point LP */ +#define PCI_DEVICE_ID_INTEL_MEI_CMP_LP_3 0x02e4 /* Comet Point LP 3 (iTouch) */ -#define MEI_DEV_ID_CMP_V 0xA3BA /* Comet Point Lake V */ +#define PCI_DEVICE_ID_INTEL_MEI_CMP_V 0xA3BA /* Comet Point Lake V */ -#define MEI_DEV_ID_CMP_H 0x06e0 /* Comet Lake H */ -#define MEI_DEV_ID_CMP_H_3 0x06e4 /* Comet Lake H 3 (iTouch) */ +#define PCI_DEVICE_ID_INTEL_MEI_CMP_H 0x06e0 /* Comet Lake H */ +#define PCI_DEVICE_ID_INTEL_MEI_CMP_H_3 0x06e4 /* Comet Lake H 3 (iTouch) */ -#define MEI_DEV_ID_CDF 0x18D3 /* Cedar Fork */ +#define PCI_DEVICE_ID_INTEL_MEI_CDF 0x18D3 /* Cedar Fork */ -#define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ -#define MEI_DEV_ID_ICP_N 0x38E0 /* Ice Lake Point N */ +#define PCI_DEVICE_ID_INTEL_MEI_ICP_LP 0x34E0 /* Ice Lake Point LP */ +#define PCI_DEVICE_ID_INTEL_MEI_ICP_N 0x38E0 /* Ice Lake Point N */ -#define MEI_DEV_ID_JSP_N 0x4DE0 /* Jasper Lake Point N */ +#define PCI_DEVICE_ID_INTEL_MEI_JSP_N 0x4DE0 /* Jasper Lake Point N */ -#define MEI_DEV_ID_TGP_LP 0xA0E0 /* Tiger Lake Point LP */ -#define MEI_DEV_ID_TGP_H 0x43E0 /* Tiger Lake Point H */ +#define PCI_DEVICE_ID_INTEL_MEI_TGP_LP 0xA0E0 /* Tiger Lake Point LP */ +#define PCI_DEVICE_ID_INTEL_MEI_TGP_H 0x43E0 /* Tiger Lake Point H */ -#define MEI_DEV_ID_MCC 0x4B70 /* Mule Creek Canyon (EHL) */ -#define MEI_DEV_ID_MCC_4 0x4B75 /* Mule Creek Canyon 4 (EHL) */ +#define PCI_DEVICE_ID_INTEL_MEI_MCC 0x4B70 /* Mule Creek Canyon (EHL) */ +#define PCI_DEVICE_ID_INTEL_MEI_MCC_4 0x4B75 /* Mule Creek Canyon 4 (EHL) */ -#define MEI_DEV_ID_EBG 0x1BE0 /* Emmitsburg WS */ +#define PCI_DEVICE_ID_INTEL_MEI_EBG 0x1BE0 /* Emmitsburg WS */ -#define MEI_DEV_ID_ADP_S 0x7AE8 /* Alder Lake Point S */ -#define MEI_DEV_ID_ADP_LP 0x7A60 /* Alder Lake Point LP */ -#define MEI_DEV_ID_ADP_P 0x51E0 /* Alder Lake Point P */ -#define MEI_DEV_ID_ADP_N 0x54E0 /* Alder Lake Point N */ +#define PCI_DEVICE_ID_INTEL_MEI_ADP_S 0x7AE8 /* Alder Lake Point S */ +#define PCI_DEVICE_ID_INTEL_MEI_ADP_LP 0x7A60 /* Alder Lake Point LP */ +#define PCI_DEVICE_ID_INTEL_MEI_ADP_P 0x51E0 /* Alder Lake Point P */ +#define PCI_DEVICE_ID_INTEL_MEI_ADP_N 0x54E0 /* Alder Lake Point N */ -#define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */ +#define PCI_DEVICE_ID_INTEL_MEI_RPL_S 0x7A68 /* Raptor Lake Point S */ -#define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */ -#define MEI_DEV_ID_ARL_S 0x7F68 /* Arrow Lake Point S */ -#define MEI_DEV_ID_ARL_H 0x7770 /* Arrow Lake Point H */ +#define PCI_DEVICE_ID_INTEL_MEI_MTL_M 0x7E70 /* Meteor Lake Point M */ +#define PCI_DEVICE_ID_INTEL_MEI_ARL_S 0x7F68 /* Arrow Lake Point S */ +#define PCI_DEVICE_ID_INTEL_MEI_ARL_H 0x7770 /* Arrow Lake Point H */ -#define MEI_DEV_ID_LNL_M 0xA870 /* Lunar Lake Point M */ +#define PCI_DEVICE_ID_INTEL_MEI_LNL_M 0xA870 /* Lunar Lake Point M */ -#define MEI_DEV_ID_PTL_H 0xE370 /* Panther Lake H */ -#define MEI_DEV_ID_PTL_P 0xE470 /* Panther Lake P */ +#define PCI_DEVICE_ID_INTEL_MEI_PTL_H 0xE370 /* Panther Lake H */ +#define PCI_DEVICE_ID_INTEL_MEI_PTL_P 0xE470 /* Panther Lake P */ -#define MEI_DEV_ID_WCL_P 0x4D70 /* Wildcat Lake P */ +#define PCI_DEVICE_ID_INTEL_MEI_WCL_P 0x4D70 /* Wildcat Lake P */ -#define MEI_DEV_ID_NVL_S 0x6E68 /* Nova Lake Point S */ +#define PCI_DEVICE_ID_INTEL_MEI_NVL_S 0x6E68 /* Nova Lake Point S */ /* * MEI HW Section diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h index 204b92af6c478..843ec2497b523 100644 --- a/drivers/misc/mei/hw-me.h +++ b/drivers/misc/mei/hw-me.h @@ -33,12 +33,6 @@ struct mei_cfg { u32 hw_trc_supported:1; }; - -#define MEI_PCI_DEVICE(dev, cfg) \ - .vendor = PCI_VENDOR_ID_INTEL, .device = (dev), \ - .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, \ - .driver_data = (kernel_ulong_t)(cfg), - #define MEI_ME_RPM_TIMEOUT 500 /* ms */ /** diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 2a6e569558b94..fe5d5aee074cd 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -26,110 +26,110 @@ /* mei_pci_tbl - PCI Device ID Table */ static const struct pci_device_id mei_me_pci_tbl[] = { - {MEI_PCI_DEVICE(MEI_DEV_ID_82946GZ, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_82G35, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_82Q965, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_82G965, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_82GM965, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_82GME965, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82Q35, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82G33, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82Q33, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82X38, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_3200, MEI_ME_ICH_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_6, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_7, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_8, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_9, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_10, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_1, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_2, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_3, MEI_ME_ICH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_4, MEI_ME_ICH_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_1, MEI_ME_ICH10_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_2, MEI_ME_ICH10_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_3, MEI_ME_ICH10_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_4, MEI_ME_ICH10_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, MEI_ME_PCH6_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, MEI_ME_PCH6_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_CPT_1, MEI_ME_PCH_CPT_PBG_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_PBG_1, MEI_ME_PCH_CPT_PBG_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH7_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH7_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH7_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, MEI_ME_PCH8_SPS_4_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, MEI_ME_PCH8_SPS_4_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_LP, MEI_ME_PCH8_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_HR, MEI_ME_PCH8_SPS_4_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP, MEI_ME_PCH8_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP_2, MEI_ME_PCH8_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_SPT, MEI_ME_PCH8_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, MEI_ME_PCH8_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_3, MEI_ME_PCH8_ITOUCH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_4_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_4_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_SPS_4_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_DNV_IE, MEI_ME_PCH8_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_GLK, MEI_ME_PCH8_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_3, MEI_ME_PCH8_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH12_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_3, MEI_ME_PCH8_ITOUCH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_SPS_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_3, MEI_ME_PCH12_SPS_ITOUCH_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_ITOUCH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_V, MEI_ME_PCH12_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H, MEI_ME_PCH12_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H_3, MEI_ME_PCH8_ITOUCH_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_N, MEI_ME_PCH12_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_H, MEI_ME_PCH15_SPS_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_JSP_N, MEI_ME_PCH15_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_CDF, MEI_ME_PCH8_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_EBG, MEI_ME_PCH15_SPS_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_S, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_LP, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_SPS_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_H, MEI_ME_PCH15_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_LNL_M, MEI_ME_PCH15_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_H, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_P, MEI_ME_PCH15_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_WCL_P, MEI_ME_PCH15_CFG)}, - - {MEI_PCI_DEVICE(MEI_DEV_ID_NVL_S, MEI_ME_PCH15_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_82946GZ, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_82G35, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_82Q965, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_82G965, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_82GM965, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_82GME965, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH9_82Q35, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH9_82G33, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH9_82Q33, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH9_82X38, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH9_3200, MEI_ME_ICH_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_ICH9_6, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH9_7, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH9_8, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH9_9, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH9_10, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH9M_1, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH9M_2, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH9M_3, MEI_ME_ICH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH9M_4, MEI_ME_ICH_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_ICH10_1, MEI_ME_ICH10_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH10_2, MEI_ME_ICH10_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH10_3, MEI_ME_ICH10_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICH10_4, MEI_ME_ICH10_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_IBXPK_1, MEI_ME_PCH6_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_IBXPK_2, MEI_ME_PCH6_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_CPT_1, MEI_ME_PCH_CPT_PBG_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_PBG_1, MEI_ME_PCH_CPT_PBG_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_PPT_1, MEI_ME_PCH7_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_PPT_2, MEI_ME_PCH7_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_PPT_3, MEI_ME_PCH7_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_LPT_H, MEI_ME_PCH8_SPS_4_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_LPT_W, MEI_ME_PCH8_SPS_4_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_LPT_LP, MEI_ME_PCH8_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_LPT_HR, MEI_ME_PCH8_SPS_4_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_WPT_LP, MEI_ME_PCH8_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_WPT_LP_2, MEI_ME_PCH8_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_SPT, MEI_ME_PCH8_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_SPT_2, MEI_ME_PCH8_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_SPT_3, MEI_ME_PCH8_ITOUCH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_SPT_H, MEI_ME_PCH8_SPS_4_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_SPT_H_2, MEI_ME_PCH8_SPS_4_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_LBG, MEI_ME_PCH12_SPS_4_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_BXT_M, MEI_ME_PCH8_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_APL_I, MEI_ME_PCH8_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_DNV_IE, MEI_ME_PCH8_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_GLK, MEI_ME_PCH8_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_KBP, MEI_ME_PCH8_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_KBP_2, MEI_ME_PCH8_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_KBP_3, MEI_ME_PCH8_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_CNP_LP, MEI_ME_PCH12_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_CNP_LP_3, MEI_ME_PCH8_ITOUCH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_CNP_H, MEI_ME_PCH12_SPS_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_CNP_H_3, MEI_ME_PCH12_SPS_ITOUCH_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_CMP_LP, MEI_ME_PCH12_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_CMP_LP_3, MEI_ME_PCH8_ITOUCH_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_CMP_V, MEI_ME_PCH12_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_CMP_H, MEI_ME_PCH12_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_CMP_H_3, MEI_ME_PCH8_ITOUCH_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_ICP_LP, MEI_ME_PCH12_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ICP_N, MEI_ME_PCH12_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_TGP_LP, MEI_ME_PCH15_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_TGP_H, MEI_ME_PCH15_SPS_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_JSP_N, MEI_ME_PCH15_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_MCC, MEI_ME_PCH15_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_MCC_4, MEI_ME_PCH8_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_CDF, MEI_ME_PCH8_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_EBG, MEI_ME_PCH15_SPS_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_ADP_S, MEI_ME_PCH15_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ADP_LP, MEI_ME_PCH15_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ADP_P, MEI_ME_PCH15_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ADP_N, MEI_ME_PCH15_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_RPL_S, MEI_ME_PCH15_SPS_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_MTL_M, MEI_ME_PCH15_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ARL_S, MEI_ME_PCH15_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_ARL_H, MEI_ME_PCH15_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_LNL_M, MEI_ME_PCH15_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_PTL_H, MEI_ME_PCH15_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_PTL_P, MEI_ME_PCH15_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_WCL_P, MEI_ME_PCH15_CFG)}, + + {PCI_DEVICE_DATA(INTEL, MEI_NVL_S, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } From da4a33e739dc886cdbc63984430b8870dba18ff3 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 28 Apr 2026 06:12:05 -0400 Subject: [PATCH 253/554] mei: me: add nova lake point H DID [ Upstream commit a5a1804332afc7035d5c5b880548262e81d796bc ] Add Nova Lake H device id. Cc: stable Co-developed-by: Tomas Winkler Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Link: https://patch.msgid.link/20260405141758.1634556-1-alexander.usyskin@intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 1 + drivers/misc/mei/pci-me.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 840e1fd2714c4..5967f95891a1f 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -123,6 +123,7 @@ #define PCI_DEVICE_ID_INTEL_MEI_WCL_P 0x4D70 /* Wildcat Lake P */ #define PCI_DEVICE_ID_INTEL_MEI_NVL_S 0x6E68 /* Nova Lake Point S */ +#define PCI_DEVICE_ID_INTEL_MEI_NVL_H 0xD370 /* Nova Lake Point H */ /* * MEI HW Section diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index fe5d5aee074cd..5b6aaa4d3a1cb 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -130,6 +130,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {PCI_DEVICE_DATA(INTEL, MEI_WCL_P, MEI_ME_PCH15_CFG)}, {PCI_DEVICE_DATA(INTEL, MEI_NVL_S, MEI_ME_PCH15_CFG)}, + {PCI_DEVICE_DATA(INTEL, MEI_NVL_H, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } From 37ef11ab2cf3ce04a92e4a5c1a59a74670c88a24 Mon Sep 17 00:00:00 2001 From: Ronak Raheja Date: Fri, 1 May 2026 09:18:55 -0400 Subject: [PATCH 254/554] phy: qcom: m31-eusb2: Update init sequence to set PHY_ENABLE [ Upstream commit 7044ed6749c8a7d49e67b2f07f42da2f29d26be6 ] Certain platforms may not have the PHY_ENABLE bit set on power on reset. Update the current sequence to explicitly write to enable the PHY_ENABLE bit. This ensures that regardless of the platform, the PHY is properly enabled. Signed-off-by: Ronak Raheja Signed-off-by: Wesley Cheng Reviewed-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Link: https://patch.msgid.link/20250920032158.242725-1-wesley.cheng@oss.qualcomm.com Signed-off-by: Vinod Koul Stable-dep-of: 520a98bdf7ae ("phy: qcom: m31-eusb2: clear PLL_EN during init") Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/phy/qualcomm/phy-qcom-m31-eusb2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c b/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c index 0a0d2d9fc8464..95cd3175926d5 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c @@ -25,6 +25,7 @@ #define POR BIT(1) #define USB_PHY_HS_PHY_CTRL_COMMON0 (0x54) +#define PHY_ENABLE BIT(0) #define SIDDQ_SEL BIT(1) #define SIDDQ BIT(2) #define FSEL GENMASK(6, 4) @@ -81,6 +82,7 @@ struct m31_eusb2_priv_data { static const struct m31_phy_tbl_entry m31_eusb2_setup_tbl[] = { M31_EUSB_PHY_INIT_CFG(USB_PHY_CFG0, UTMI_PHY_CMN_CTRL_OVERRIDE_EN, 1), M31_EUSB_PHY_INIT_CFG(USB_PHY_UTMI_CTRL5, POR, 1), + M31_EUSB_PHY_INIT_CFG(USB_PHY_HS_PHY_CTRL_COMMON0, PHY_ENABLE, 1), M31_EUSB_PHY_INIT_CFG(USB_PHY_CFG1, PLL_EN, 1), M31_EUSB_PHY_INIT_CFG(USB_PHY_FSEL_SEL, FSEL_SEL, 1), }; From 09e1c96594afb2a13e4090119728545ca9c06dfd Mon Sep 17 00:00:00 2001 From: Elson Serrao Date: Fri, 1 May 2026 09:18:56 -0400 Subject: [PATCH 255/554] phy: qcom: m31-eusb2: clear PLL_EN during init [ Upstream commit 520a98bdf7ae0130e22d8adced3d69a2e211b41f ] The driver currently sets bit 0 of USB_PHY_CFG1 (PLL_EN) during PHY initialization. According to the M31 EUSB2 PHY hardware documentation, this bit is intended only for test/debug scenarios and does not control mission mode operation. Keeping PLL_EN asserted causes the PHY to draw additional current during USB bus suspend. Clearing this bit results in lower suspend power consumption without affecting normal operation. Update the driver to leave PLL_EN cleared as recommended by the hardware documentation. Fixes: 9c8504861cc4 ("phy: qcom: Add M31 based eUSB2 PHY driver") Cc: stable@vger.kernel.org Signed-off-by: Elson Serrao Reviewed-by: Konrad Dybcio Link: https://patch.msgid.link/20260217201130.2804550-1-elson.serrao@oss.qualcomm.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/phy/qualcomm/phy-qcom-m31-eusb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c b/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c index 95cd3175926d5..68f1ba8fec4ad 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-m31-eusb2.c @@ -83,7 +83,7 @@ static const struct m31_phy_tbl_entry m31_eusb2_setup_tbl[] = { M31_EUSB_PHY_INIT_CFG(USB_PHY_CFG0, UTMI_PHY_CMN_CTRL_OVERRIDE_EN, 1), M31_EUSB_PHY_INIT_CFG(USB_PHY_UTMI_CTRL5, POR, 1), M31_EUSB_PHY_INIT_CFG(USB_PHY_HS_PHY_CTRL_COMMON0, PHY_ENABLE, 1), - M31_EUSB_PHY_INIT_CFG(USB_PHY_CFG1, PLL_EN, 1), + M31_EUSB_PHY_INIT_CFG(USB_PHY_CFG1, PLL_EN, 0), M31_EUSB_PHY_INIT_CFG(USB_PHY_FSEL_SEL, FSEL_SEL, 1), }; From 0bc155c4ca4776f9de1851d54d457671d676ac57 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 30 Apr 2026 12:31:34 -0400 Subject: [PATCH 256/554] wifi: mt76: mt792x: describe USB WFSYS reset with a descriptor [ Upstream commit e6f48512c1ceebcd1ce6bb83df3b3d56a261507d ] Prepare mt792xu_wfsys_reset() for chips that share the same USB WFSYS reset flow but use different register definitions. This is a pure refactor of the current mt7921u path and keeps the reset sequence unchanged. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260311002825.15502-1-sean.wang@kernel.org Signed-off-by: Felix Fietkau Stable-dep-of: 56154fef47d1 ("wifi: mt76: mt792x: fix mt7925u USB WFSYS reset handling") Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../net/wireless/mediatek/mt76/mt792x_usb.c | 40 +++++++++++++++---- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c index 76272a03b22e5..7b1bee3c6605d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c @@ -206,6 +206,24 @@ static void mt792xu_epctl_rst_opt(struct mt792x_dev *dev, bool reset) mt792xu_uhw_wr(&dev->mt76, MT_SSUSB_EPCTL_CSR_EP_RST_OPT, val); } +struct mt792xu_wfsys_desc { + u32 rst_reg; + u32 done_reg; + u32 done_mask; + u32 done_val; + u32 delay_ms; + bool need_status_sel; +}; + +static const struct mt792xu_wfsys_desc mt7921_wfsys_desc = { + .rst_reg = MT_CBTOP_RGU_WF_SUBSYS_RST, + .done_reg = MT_UDMA_CONN_INFRA_STATUS, + .done_mask = MT_UDMA_CONN_WFSYS_INIT_DONE, + .done_val = MT_UDMA_CONN_WFSYS_INIT_DONE, + .delay_ms = 0, + .need_status_sel = true, +}; + int mt792xu_dma_init(struct mt792x_dev *dev, bool resume) { int err; @@ -236,25 +254,31 @@ EXPORT_SYMBOL_GPL(mt792xu_dma_init); int mt792xu_wfsys_reset(struct mt792x_dev *dev) { + const struct mt792xu_wfsys_desc *desc = &mt7921_wfsys_desc; u32 val; int i; mt792xu_epctl_rst_opt(dev, false); - val = mt792xu_uhw_rr(&dev->mt76, MT_CBTOP_RGU_WF_SUBSYS_RST); + val = mt792xu_uhw_rr(&dev->mt76, desc->rst_reg); val |= MT_CBTOP_RGU_WF_SUBSYS_RST_WF_WHOLE_PATH; - mt792xu_uhw_wr(&dev->mt76, MT_CBTOP_RGU_WF_SUBSYS_RST, val); + mt792xu_uhw_wr(&dev->mt76, desc->rst_reg, val); - usleep_range(10, 20); + if (desc->delay_ms) + msleep(desc->delay_ms); + else + usleep_range(10, 20); - val = mt792xu_uhw_rr(&dev->mt76, MT_CBTOP_RGU_WF_SUBSYS_RST); + val = mt792xu_uhw_rr(&dev->mt76, desc->rst_reg); val &= ~MT_CBTOP_RGU_WF_SUBSYS_RST_WF_WHOLE_PATH; - mt792xu_uhw_wr(&dev->mt76, MT_CBTOP_RGU_WF_SUBSYS_RST, val); + mt792xu_uhw_wr(&dev->mt76, desc->rst_reg, val); + + if (desc->need_status_sel) + mt792xu_uhw_wr(&dev->mt76, MT_UDMA_CONN_INFRA_STATUS_SEL, 0); - mt792xu_uhw_wr(&dev->mt76, MT_UDMA_CONN_INFRA_STATUS_SEL, 0); for (i = 0; i < MT792x_WFSYS_INIT_RETRY_COUNT; i++) { - val = mt792xu_uhw_rr(&dev->mt76, MT_UDMA_CONN_INFRA_STATUS); - if (val & MT_UDMA_CONN_WFSYS_INIT_DONE) + val = mt792xu_uhw_rr(&dev->mt76, desc->done_reg); + if ((val & desc->done_mask) == desc->done_val) break; msleep(100); From 7d7863018f40df0c6d32714a1723ae0c119abba0 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 30 Apr 2026 12:31:35 -0400 Subject: [PATCH 257/554] wifi: mt76: mt792x: fix mt7925u USB WFSYS reset handling [ Upstream commit 56154fef47d104effa9f29ed3db4f805cbc0d640 ] mt7925u uses different reset/status registers from mt7921u. Reusing the mt7921u register set causes the WFSYS reset to fail. Add a chip-specific descriptor in mt792xu_wfsys_reset() to select the correct registers and fix mt7925u failing to initialize after a warm reboot. Fixes: d28e1a48952e ("wifi: mt76: mt792x: introduce mt792x-usb module") Cc: stable@vger.kernel.org Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260311002825.15502-2-sean.wang@kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/mt792x_regs.h | 4 ++++ drivers/net/wireless/mediatek/mt76/mt792x_usb.c | 13 ++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_regs.h b/drivers/net/wireless/mediatek/mt76/mt792x_regs.h index 458cfd0260b13..b0c6dfa55cc68 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt792x_regs.h @@ -390,6 +390,10 @@ #define MT_CBTOP_RGU_WF_SUBSYS_RST MT_CBTOP_RGU(0x600) #define MT_CBTOP_RGU_WF_SUBSYS_RST_WF_WHOLE_PATH BIT(0) +#define MT7925_CBTOP_RGU_WF_SUBSYS_RST 0x70028600 +#define MT7925_WFSYS_INIT_DONE_ADDR 0x184c1604 +#define MT7925_WFSYS_INIT_DONE 0x00001d1e + #define MT_HW_BOUND 0x70010020 #define MT_HW_CHIPID 0x70010200 #define MT_HW_REV 0x70010204 diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c index 7b1bee3c6605d..98d1d14342cd0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c @@ -224,6 +224,15 @@ static const struct mt792xu_wfsys_desc mt7921_wfsys_desc = { .need_status_sel = true, }; +static const struct mt792xu_wfsys_desc mt7925_wfsys_desc = { + .rst_reg = MT7925_CBTOP_RGU_WF_SUBSYS_RST, + .done_reg = MT7925_WFSYS_INIT_DONE_ADDR, + .done_mask = U32_MAX, + .done_val = MT7925_WFSYS_INIT_DONE, + .delay_ms = 20, + .need_status_sel = false, +}; + int mt792xu_dma_init(struct mt792x_dev *dev, bool resume) { int err; @@ -254,7 +263,9 @@ EXPORT_SYMBOL_GPL(mt792xu_dma_init); int mt792xu_wfsys_reset(struct mt792x_dev *dev) { - const struct mt792xu_wfsys_desc *desc = &mt7921_wfsys_desc; + const struct mt792xu_wfsys_desc *desc = is_mt7925(&dev->mt76) ? + &mt7925_wfsys_desc : + &mt7921_wfsys_desc; u32 val; int i; From 7ba734027b2beeaa24a6f76614004a4126cccf74 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Fri, 1 May 2026 11:42:03 -0400 Subject: [PATCH 258/554] media: rc: ttusbir: respect DMA coherency rules [ Upstream commit 50acaad3d202c064779db8dc3d010007347f59c7 ] Buffers must not share a cache line with other data structures. Allocate separately. Fixes: 0938069fa0897 ("[media] rc: Add support for the TechnoTrend USB IR Receiver") Cc: stable@vger.kernel.org Signed-off-by: Oliver Neukum Signed-off-by: Sean Young Signed-off-by: Hans Verkuil [ kept kzalloc(sizeof(*tt), GFP_KERNEL) instead of kzalloc_obj() ] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/ttusbir.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/media/rc/ttusbir.c b/drivers/media/rc/ttusbir.c index 560a26f3965cf..dde446a95eaa9 100644 --- a/drivers/media/rc/ttusbir.c +++ b/drivers/media/rc/ttusbir.c @@ -32,7 +32,7 @@ struct ttusbir { struct led_classdev led; struct urb *bulk_urb; - uint8_t bulk_buffer[5]; + u8 *bulk_buffer; int bulk_out_endp, iso_in_endp; bool led_on, is_led_on; atomic_t led_complete; @@ -186,13 +186,16 @@ static int ttusbir_probe(struct usb_interface *intf, struct rc_dev *rc; int i, j, ret; int altsetting = -1; + u8 *buffer; tt = kzalloc(sizeof(*tt), GFP_KERNEL); + buffer = kzalloc(5, GFP_KERNEL); rc = rc_allocate_device(RC_DRIVER_IR_RAW); - if (!tt || !rc) { + if (!tt || !rc || buffer) { ret = -ENOMEM; goto out; } + tt->bulk_buffer = buffer; /* find the correct alt setting */ for (i = 0; i < intf->num_altsetting && altsetting == -1; i++) { @@ -281,8 +284,8 @@ static int ttusbir_probe(struct usb_interface *intf, tt->bulk_buffer[3] = 0x01; usb_fill_bulk_urb(tt->bulk_urb, tt->udev, usb_sndbulkpipe(tt->udev, - tt->bulk_out_endp), tt->bulk_buffer, sizeof(tt->bulk_buffer), - ttusbir_bulk_complete, tt); + tt->bulk_out_endp), tt->bulk_buffer, 5, + ttusbir_bulk_complete, tt); tt->led.name = "ttusbir:green:power"; tt->led.default_trigger = "rc-feedback"; @@ -351,6 +354,7 @@ static int ttusbir_probe(struct usb_interface *intf, kfree(tt); } rc_free_device(rc); + kfree(buffer); return ret; } @@ -373,6 +377,7 @@ static void ttusbir_disconnect(struct usb_interface *intf) } usb_kill_urb(tt->bulk_urb); usb_free_urb(tt->bulk_urb); + kfree(tt->bulk_buffer); usb_set_intfdata(intf, NULL); kfree(tt); } From 0adac0ee2c42027d80bac02ea9b576a88f8955d3 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Fri, 1 May 2026 15:12:22 -0400 Subject: [PATCH 259/554] media: rc: igorplugusb: heed coherency rules [ Upstream commit eac69475b01fe1e861dfe3960b57fa95671c132e ] In a control request, the USB request structure can be subject to DMA on some HCs. Hence it must obey the rules for DMA coherency. Allocate it separately. Fixes: b1c97193c6437 ("[media] rc: port IgorPlug-USB to rc-core") Cc: stable@vger.kernel.org Signed-off-by: Oliver Neukum Signed-off-by: Sean Young Signed-off-by: Hans Verkuil [ replaced kzalloc_obj(*ir->request, GFP_KERNEL) with kzalloc(sizeof(*ir->request), GFP_KERNEL) ] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/igorplugusb.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/media/rc/igorplugusb.c b/drivers/media/rc/igorplugusb.c index e034c93d57cf0..e7e31776453c1 100644 --- a/drivers/media/rc/igorplugusb.c +++ b/drivers/media/rc/igorplugusb.c @@ -34,7 +34,7 @@ struct igorplugusb { struct device *dev; struct urb *urb; - struct usb_ctrlrequest request; + struct usb_ctrlrequest *request; struct timer_list timer; @@ -122,7 +122,7 @@ static void igorplugusb_cmd(struct igorplugusb *ir, int cmd) { int ret; - ir->request.bRequest = cmd; + ir->request->bRequest = cmd; ir->urb->transfer_flags = 0; ret = usb_submit_urb(ir->urb, GFP_ATOMIC); if (ret && ret != -EPERM) @@ -164,13 +164,17 @@ static int igorplugusb_probe(struct usb_interface *intf, if (!ir) return -ENOMEM; + ir->request = kzalloc(sizeof(*ir->request), GFP_KERNEL); + if (!ir->request) + goto fail; + ir->dev = &intf->dev; timer_setup(&ir->timer, igorplugusb_timer, 0); - ir->request.bRequest = GET_INFRACODE; - ir->request.bRequestType = USB_TYPE_VENDOR | USB_DIR_IN; - ir->request.wLength = cpu_to_le16(MAX_PACKET); + ir->request->bRequest = GET_INFRACODE; + ir->request->bRequestType = USB_TYPE_VENDOR | USB_DIR_IN; + ir->request->wLength = cpu_to_le16(MAX_PACKET); ir->urb = usb_alloc_urb(0, GFP_KERNEL); if (!ir->urb) @@ -228,6 +232,7 @@ static int igorplugusb_probe(struct usb_interface *intf, usb_free_urb(ir->urb); rc_free_device(ir->rc); kfree(ir->buf_in); + kfree(ir->request); return ret; } @@ -243,6 +248,7 @@ static void igorplugusb_disconnect(struct usb_interface *intf) usb_unpoison_urb(ir->urb); usb_free_urb(ir->urb); kfree(ir->buf_in); + kfree(ir->request); } static const struct usb_device_id igorplugusb_table[] = { From c772e9efe2b1f1b1dcd481474e36eae42a42176d Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Sun, 3 May 2026 00:24:54 -0400 Subject: [PATCH 260/554] iio: frequency: admv1013: add dev variable [ Upstream commit e61b5bb0e91390adee41eaddc0a1a7d55d5652b2 ] Introduce a local struct device pointer in functions that reference &spi->dev for device-managed resource calls and device property reads, improving code readability. Signed-off-by: Antoniu Miclaus Reviewed-by: Andy Shevchenko Signed-off-by: Jonathan Cameron Stable-dep-of: aac0a51b1670 ("iio: frequency: admv1013: fix NULL pointer dereference on str") Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iio/frequency/admv1013.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/iio/frequency/admv1013.c b/drivers/iio/frequency/admv1013.c index d8e8d541990f8..d29e288da011a 100644 --- a/drivers/iio/frequency/admv1013.c +++ b/drivers/iio/frequency/admv1013.c @@ -518,11 +518,11 @@ static int admv1013_properties_parse(struct admv1013_state *st) { int ret; const char *str; - struct spi_device *spi = st->spi; + struct device *dev = &st->spi->dev; - st->det_en = device_property_read_bool(&spi->dev, "adi,detector-enable"); + st->det_en = device_property_read_bool(dev, "adi,detector-enable"); - ret = device_property_read_string(&spi->dev, "adi,input-mode", &str); + ret = device_property_read_string(dev, "adi,input-mode", &str); if (ret) st->input_mode = ADMV1013_IQ_MODE; @@ -533,7 +533,7 @@ static int admv1013_properties_parse(struct admv1013_state *st) else return -EINVAL; - ret = device_property_read_string(&spi->dev, "adi,quad-se-mode", &str); + ret = device_property_read_string(dev, "adi,quad-se-mode", &str); if (ret) st->quad_se_mode = ADMV1013_SE_MODE_DIFF; @@ -546,11 +546,11 @@ static int admv1013_properties_parse(struct admv1013_state *st) else return -EINVAL; - ret = devm_regulator_bulk_get_enable(&st->spi->dev, + ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(admv1013_vcc_regs), admv1013_vcc_regs); if (ret) { - dev_err_probe(&spi->dev, ret, + dev_err_probe(dev, ret, "Failed to request VCC regulators\n"); return ret; } @@ -562,9 +562,10 @@ static int admv1013_probe(struct spi_device *spi) { struct iio_dev *indio_dev; struct admv1013_state *st; + struct device *dev = &spi->dev; int ret, vcm_uv; - indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*st)); + indio_dev = devm_iio_device_alloc(dev, sizeof(*st)); if (!indio_dev) return -ENOMEM; @@ -581,20 +582,20 @@ static int admv1013_probe(struct spi_device *spi) if (ret) return ret; - ret = devm_regulator_get_enable_read_voltage(&spi->dev, "vcm"); + ret = devm_regulator_get_enable_read_voltage(dev, "vcm"); if (ret < 0) - return dev_err_probe(&spi->dev, ret, + return dev_err_probe(dev, ret, "failed to get the common-mode voltage\n"); vcm_uv = ret; - st->clkin = devm_clk_get_enabled(&spi->dev, "lo_in"); + st->clkin = devm_clk_get_enabled(dev, "lo_in"); if (IS_ERR(st->clkin)) - return dev_err_probe(&spi->dev, PTR_ERR(st->clkin), + return dev_err_probe(dev, PTR_ERR(st->clkin), "failed to get the LO input clock\n"); st->nb.notifier_call = admv1013_freq_change; - ret = devm_clk_notifier_register(&spi->dev, st->clkin, &st->nb); + ret = devm_clk_notifier_register(dev, st->clkin, &st->nb); if (ret) return ret; @@ -606,11 +607,11 @@ static int admv1013_probe(struct spi_device *spi) return ret; } - ret = devm_add_action_or_reset(&spi->dev, admv1013_powerdown, st); + ret = devm_add_action_or_reset(dev, admv1013_powerdown, st); if (ret) return ret; - return devm_iio_device_register(&spi->dev, indio_dev); + return devm_iio_device_register(dev, indio_dev); } static const struct spi_device_id admv1013_id[] = { From 5e9f1bad26df3d3afb3cbbfa408b6d6e809708ac Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Sun, 3 May 2026 00:24:55 -0400 Subject: [PATCH 261/554] iio: frequency: admv1013: fix NULL pointer dereference on str MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aac0a51b16700b403a55b67ba495de021db78763 ] When device_property_read_string() fails, str is left uninitialized but the code falls through to strcmp(str, ...), dereferencing a garbage pointer. Replace manual read/strcmp with device_property_match_property_string() and consolidate the SE mode enums into a single sequential enum, mapping to hardware register values via a switch consistent with other bitfields in the driver. Several cleanup patches have been applied to this driver recently so this will need a manual backport. Fixes: da35a7b526d9 ("iio: frequency: admv1013: add support for ADMV1013") Reviewed-by: Nuno Sá Signed-off-by: Antoniu Miclaus Reviewed-by: Andy Shevchenko Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iio/frequency/admv1013.c | 65 ++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/drivers/iio/frequency/admv1013.c b/drivers/iio/frequency/admv1013.c index d29e288da011a..5cea2c9887905 100644 --- a/drivers/iio/frequency/admv1013.c +++ b/drivers/iio/frequency/admv1013.c @@ -85,9 +85,9 @@ enum { }; enum { - ADMV1013_SE_MODE_POS = 6, - ADMV1013_SE_MODE_NEG = 9, - ADMV1013_SE_MODE_DIFF = 12 + ADMV1013_SE_MODE_POS, + ADMV1013_SE_MODE_NEG, + ADMV1013_SE_MODE_DIFF, }; struct admv1013_state { @@ -470,10 +470,23 @@ static int admv1013_init(struct admv1013_state *st, int vcm_uv) if (ret) return ret; - data = FIELD_PREP(ADMV1013_QUAD_SE_MODE_MSK, st->quad_se_mode); + switch (st->quad_se_mode) { + case ADMV1013_SE_MODE_POS: + data = 6; + break; + case ADMV1013_SE_MODE_NEG: + data = 9; + break; + case ADMV1013_SE_MODE_DIFF: + data = 12; + break; + default: + return -EINVAL; + } ret = __admv1013_spi_update_bits(st, ADMV1013_REG_QUAD, - ADMV1013_QUAD_SE_MODE_MSK, data); + ADMV1013_QUAD_SE_MODE_MSK, + FIELD_PREP(ADMV1013_QUAD_SE_MODE_MSK, data)); if (ret) return ret; @@ -514,37 +527,33 @@ static void admv1013_powerdown(void *data) admv1013_spi_update_bits(data, ADMV1013_REG_ENABLE, enable_reg_msk, enable_reg); } +static const char * const admv1013_input_modes[] = { + [ADMV1013_IQ_MODE] = "iq", + [ADMV1013_IF_MODE] = "if", +}; + +static const char * const admv1013_quad_se_modes[] = { + [ADMV1013_SE_MODE_POS] = "se-pos", + [ADMV1013_SE_MODE_NEG] = "se-neg", + [ADMV1013_SE_MODE_DIFF] = "diff", +}; + static int admv1013_properties_parse(struct admv1013_state *st) { int ret; - const char *str; struct device *dev = &st->spi->dev; st->det_en = device_property_read_bool(dev, "adi,detector-enable"); - ret = device_property_read_string(dev, "adi,input-mode", &str); - if (ret) - st->input_mode = ADMV1013_IQ_MODE; - - if (!strcmp(str, "iq")) - st->input_mode = ADMV1013_IQ_MODE; - else if (!strcmp(str, "if")) - st->input_mode = ADMV1013_IF_MODE; - else - return -EINVAL; + ret = device_property_match_property_string(dev, "adi,input-mode", + admv1013_input_modes, + ARRAY_SIZE(admv1013_input_modes)); + st->input_mode = ret >= 0 ? ret : ADMV1013_IQ_MODE; - ret = device_property_read_string(dev, "adi,quad-se-mode", &str); - if (ret) - st->quad_se_mode = ADMV1013_SE_MODE_DIFF; - - if (!strcmp(str, "diff")) - st->quad_se_mode = ADMV1013_SE_MODE_DIFF; - else if (!strcmp(str, "se-pos")) - st->quad_se_mode = ADMV1013_SE_MODE_POS; - else if (!strcmp(str, "se-neg")) - st->quad_se_mode = ADMV1013_SE_MODE_NEG; - else - return -EINVAL; + ret = device_property_match_property_string(dev, "adi,quad-se-mode", + admv1013_quad_se_modes, + ARRAY_SIZE(admv1013_quad_se_modes)); + st->quad_se_mode = ret >= 0 ? ret : ADMV1013_SE_MODE_DIFF; ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(admv1013_vcc_regs), From 35fb4a0c077c5d1049c2628b769e0a1b1e65df0d Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Sun, 3 May 2026 13:54:25 -0400 Subject: [PATCH 262/554] net: qrtr: ns: Limit the maximum server registration per node [ Upstream commit d5ee2ff98322337951c56398e79d51815acbf955 ] Current code does no bound checking on the number of servers added per node. A malicious client can flood NEW_SERVER messages and exhaust memory. Fix this issue by limiting the maximum number of server registrations to 256 per node. If the NEW_SERVER message is received for an old port, then don't restrict it as it will get replaced. While at it, also rate limit the error messages in the failure path of qrtr_ns_worker(). Note that the limit of 256 is chosen based on the current platform requirements. If requirement changes in the future, this limit can be increased. Cc: stable@vger.kernel.org Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Reported-by: Yiming Qian Reviewed-by: Simon Horman Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260409-qrtr-fix-v3-1-00a8a5ff2b51@oss.qualcomm.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/qrtr/ns.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 2018a71a5c4c1..8a5352f6cf8b6 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -68,8 +68,14 @@ struct qrtr_server { struct qrtr_node { unsigned int id; struct xarray servers; + u32 server_count; }; +/* Max server limit is chosen based on the current platform requirements. If the + * requirement changes in the future, this value can be increased. + */ +#define QRTR_NS_MAX_SERVERS 256 + static struct qrtr_node *node_get(unsigned int node_id) { struct qrtr_node *node; @@ -230,6 +236,17 @@ static struct qrtr_server *server_add(unsigned int service, if (!service || !port) return NULL; + node = node_get(node_id); + if (!node) + return NULL; + + /* Make sure the new servers per port are capped at the maximum value */ + old = xa_load(&node->servers, port); + if (!old && node->server_count >= QRTR_NS_MAX_SERVERS) { + pr_err_ratelimited("QRTR client node %u exceeds max server limit!\n", node_id); + return NULL; + } + srv = kzalloc(sizeof(*srv), GFP_KERNEL); if (!srv) return NULL; @@ -239,10 +256,6 @@ static struct qrtr_server *server_add(unsigned int service, srv->node = node_id; srv->port = port; - node = node_get(node_id); - if (!node) - goto err; - /* Delete the old server on the same port */ old = xa_store(&node->servers, port, srv, GFP_KERNEL); if (old) { @@ -253,6 +266,8 @@ static struct qrtr_server *server_add(unsigned int service, } else { kfree(old); } + } else { + node->server_count++; } trace_qrtr_ns_server_add(srv->service, srv->instance, @@ -293,6 +308,7 @@ static int server_del(struct qrtr_node *node, unsigned int port, bool bcast) } kfree(srv); + node->server_count--; return 0; } @@ -681,7 +697,7 @@ static void qrtr_ns_worker(struct work_struct *work) } if (ret < 0) - pr_err("failed while handling packet from %d:%d", + pr_err_ratelimited("failed while handling packet from %d:%d", sq.sq_node, sq.sq_port); } From 20855cef7e659ef84ac73251256fa530819b2346 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Sun, 3 May 2026 13:55:04 -0400 Subject: [PATCH 263/554] net: qrtr: ns: Limit the maximum number of lookups [ Upstream commit 5640227d9a21c6a8be249a10677b832e7f40dc55 ] Current code does no bound checking on the number of lookups a client can perform. Though the code restricts the lookups to local clients, there is still a possibility of a malicious local client sending a flood of NEW_LOOKUP messages over the same socket. Fix this issue by limiting the maximum number of lookups to 64 globally. Since the nameserver allows only atmost one local observer, this global lookup count will ensure that the lookups stay within the limit. Note that, limit of 64 is chosen based on the current platform requirements. If requirement changes in the future, this limit can be increased. Cc: stable@vger.kernel.org Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260409-qrtr-fix-v3-2-00a8a5ff2b51@oss.qualcomm.com Signed-off-by: Jakub Kicinski [ adapted comment block to only mention QRTR_NS_MAX_LOOKUPS and kept kzalloc() instead of kzalloc_obj() due to missing prerequisite commits ] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/qrtr/ns.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 8a5352f6cf8b6..b6da0e7d72a5d 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -22,6 +22,7 @@ static struct { struct socket *sock; struct sockaddr_qrtr bcast_sq; struct list_head lookups; + u32 lookup_count; struct workqueue_struct *workqueue; struct work_struct work; void (*saved_data_ready)(struct sock *sk); @@ -76,6 +77,11 @@ struct qrtr_node { */ #define QRTR_NS_MAX_SERVERS 256 +/* Max lookup limit is chosen based on the current platform requirements. If the + * requirement changes in the future, this value can be increased. + */ +#define QRTR_NS_MAX_LOOKUPS 64 + static struct qrtr_node *node_get(unsigned int node_id) { struct qrtr_node *node; @@ -444,6 +450,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, list_del(&lookup->li); kfree(lookup); + qrtr_ns.lookup_count--; } /* Remove the server belonging to this port but don't broadcast @@ -561,6 +568,11 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, if (from->sq_node != qrtr_ns.local_node) return -EINVAL; + if (qrtr_ns.lookup_count >= QRTR_NS_MAX_LOOKUPS) { + pr_err_ratelimited("QRTR client node exceeds max lookup limit!\n"); + return -ENOSPC; + } + lookup = kzalloc(sizeof(*lookup), GFP_KERNEL); if (!lookup) return -ENOMEM; @@ -569,6 +581,7 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, lookup->service = service; lookup->instance = instance; list_add_tail(&lookup->li, &qrtr_ns.lookups); + qrtr_ns.lookup_count++; memset(&filter, 0, sizeof(filter)); filter.service = service; @@ -609,6 +622,7 @@ static void ctrl_cmd_del_lookup(struct sockaddr_qrtr *from, list_del(&lookup->li); kfree(lookup); + qrtr_ns.lookup_count--; } } From 5cf6d5e5e3b804a44692fbf548a5179442e2e923 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 4 May 2026 03:40:08 -0400 Subject: [PATCH 264/554] net: qrtr: ns: Limit the total number of nodes [ Upstream commit 27d5e84e810b0849d08b9aec68e48570461ce313 ] Currently, the nameserver doesn't limit the number of nodes it handles. This can be an attack vector if a malicious client starts registering random nodes, leading to memory exhaustion. Hence, limit the maximum number of nodes to 64. Note that, limit of 64 is chosen based on the current platform requirements. If requirement changes in the future, this limit can be increased. Cc: stable@vger.kernel.org Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260409-qrtr-fix-v3-4-00a8a5ff2b51@oss.qualcomm.com Signed-off-by: Jakub Kicinski [ dropped comment/define changes for missing QRTR_NS_MAX_SERVERS/LOOKUPS prereqs and kept plain kzalloc instead of kzalloc_obj ] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/qrtr/ns.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index b6da0e7d72a5d..a10e8de4f7e10 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -82,6 +82,13 @@ struct qrtr_node { */ #define QRTR_NS_MAX_LOOKUPS 64 +/* Max nodes limit is chosen based on the current platform requirements. + * If the requirement changes in the future, this value can be increased. + */ +#define QRTR_NS_MAX_NODES 64 + +static u8 node_count; + static struct qrtr_node *node_get(unsigned int node_id) { struct qrtr_node *node; @@ -90,6 +97,11 @@ static struct qrtr_node *node_get(unsigned int node_id) if (node) return node; + if (node_count >= QRTR_NS_MAX_NODES) { + pr_err_ratelimited("QRTR clients exceed max node limit!\n"); + return NULL; + } + /* If node didn't exist, allocate and insert it to the tree */ node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) @@ -103,6 +115,8 @@ static struct qrtr_node *node_get(unsigned int node_id) return NULL; } + node_count++; + return node; } @@ -409,6 +423,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) delete_node: xa_erase(&nodes, from->sq_node); kfree(node); + node_count--; return ret; } From b42821c15445f93daea3e76ada682b2b7181c476 Mon Sep 17 00:00:00 2001 From: Yucheng Lu Date: Wed, 22 Apr 2026 21:45:04 +0800 Subject: [PATCH 265/554] crypto: authencesn - reject short ahash digests during instance creation commit 5db6ef9847717329f12c5ea8aba7e9f588a980c0 upstream. authencesn requires either a zero authsize or an authsize of at least 4 bytes because the ESN encrypt/decrypt paths always move 4 bytes of high-order sequence number data at the end of the authenticated data. While crypto_authenc_esn_setauthsize() already rejects explicit non-zero authsizes in the range 1..3, crypto_authenc_esn_create() still copied auth->digestsize into inst->alg.maxauthsize without validating it. The AEAD core then initialized the tfm's default authsize from that value. As a result, selecting an ahash with digest size 1..3, such as cbcmac(cipher_null), exposed authencesn instances whose default authsize was invalid even though setauthsize() would have rejected the same value. AF_ALG could then trigger the ESN tail handling with a too-short tag and hit an out-of-bounds access. Reject authencesn instances whose ahash digest size is in the invalid non-zero range 1..3 so that no tfm can inherit an unsupported default authsize. Fixes: f15f05b0a5de ("crypto: ccm - switch to separate cbcmac driver") Cc: stable@kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Yuhang Zheng Reviewed-by: Eric Biggers Signed-off-by: Yucheng Lu Signed-off-by: Ren Wei Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/authencesn.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crypto/authencesn.c b/crypto/authencesn.c index af3d584e584fb..522df41365d8f 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -390,6 +390,11 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl, auth = crypto_spawn_ahash_alg(&ctx->auth); auth_base = &auth->base; + if (auth->digestsize > 0 && auth->digestsize < 4) { + err = -EINVAL; + goto err_free_inst; + } + err = crypto_grab_skcipher(&ctx->enc, aead_crypto_instance(inst), crypto_attr_alg_name(tb[2]), 0, mask); if (err) From b8c5acce56e07eb6654d0ff0427945875d9179f8 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 13 Apr 2026 19:59:11 -0700 Subject: [PATCH 266/554] driver core: Add kernel-doc for DEV_FLAG_COUNT enum value commit 5b484311507b5d403c1f7a45f6aa3778549e268b upstream. Even though nobody should use this value (except when declaring the "flags" bitmap), kernel-doc still gets upset that it's not documented. It reports: WARNING: ../include/linux/device.h:519 Enum value 'DEV_FLAG_COUNT' not described in enum 'struct_device_flags' Add the description of DEV_FLAG_COUNT. Fixes: a2225b6e834a ("driver core: Don't let a device probe until it's ready") Reported-by: Randy Dunlap Closes: https://lore.kernel.org/f318cd43-81fd-48b9-abf7-92af85f12f91@infradead.org Signed-off-by: Douglas Anderson Tested-by: Randy Dunlap Reviewed-by: Randy Dunlap Link: https://patch.msgid.link/20260413195910.1.I23aca74fe2d3636a47df196a80920fecb2643220@changeid Signed-off-by: Danilo Krummrich Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/device.h b/include/linux/device.h index f5d5da4dfab1d..84910bffe7953 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -485,6 +485,7 @@ struct device_physical_location { * * @DEV_FLAG_READY_TO_PROBE: If set then device_add() has finished enough * initialization that probe could be called. + * @DEV_FLAG_COUNT: Number of defined struct_device_flags. */ enum struct_device_flags { DEV_FLAG_READY_TO_PROBE = 0, From 1d160e30aa42b7c41163e51366bb34432367260d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2026 14:37:53 +0200 Subject: [PATCH 267/554] ALSA: caiaq: Fix potentially leftover ep1_in_urb at error path commit 0a7b5221b5b51cc798fcfc3be00d02eade149d69 upstream. The previous fix for handling the error from setup_card() missed that an internal URB cdev->ep1_in_urb might have been already submitted beforehand. In the normal case, this URB gets killed at the disconnection, but in the error path, we didn't do it, hence there can be a potential leak. Fix it in the error path for setup_card(), too. Fixes: 28abd224db4a ("ALSA: caiaq: Handle probe errors properly") Cc: Link: https://patch.msgid.link/20260427123819.890185-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/caiaq/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index 8af0c04041ee3..e6fe82e4e3d75 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -514,7 +514,7 @@ static int init_card(struct snd_usb_caiaqdev *cdev) card->private_free = card_free; err = setup_card(cdev); if (err < 0) - return err; + goto err_kill_urb; return 0; From 3d46009e2ad467cfd2eda23165f92d9e3c1e8480 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2026 16:56:15 +0200 Subject: [PATCH 268/554] ALSA: caiaq: Don't abort when no input device is available commit b32ae47a2b0a1fb4bd4942242847966d9b178222 upstream. The previous fix to handle the error from setup_card() caused a regression for the models that have no dedicated input device; snd_usb_caiaq_input_init() just returns -EINVAL, and we treat it as a fatal error although it should be ignored. As a regression fix, change the error code to -ENODEV, and ignore this error in the callee, to continue probing. Fixes: 28abd224db4a ("ALSA: caiaq: Handle probe errors properly") Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=221423 Link: https://patch.msgid.link/20260427145642.6637-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/caiaq/device.c | 2 +- sound/usb/caiaq/input.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index e6fe82e4e3d75..e78980ab17567 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -366,7 +366,7 @@ static int setup_card(struct snd_usb_caiaqdev *cdev) #ifdef CONFIG_SND_USB_CAIAQ_INPUT ret = snd_usb_caiaq_input_init(cdev); - if (ret < 0) { + if (ret < 0 && ret != -ENODEV) { dev_err(dev, "Unable to set up input system (ret=%d)\n", ret); return ret; } diff --git a/sound/usb/caiaq/input.c b/sound/usb/caiaq/input.c index a9130891bb696..5c70fdf61cc13 100644 --- a/sound/usb/caiaq/input.c +++ b/sound/usb/caiaq/input.c @@ -804,7 +804,7 @@ int snd_usb_caiaq_input_init(struct snd_usb_caiaqdev *cdev) default: /* no input methods supported on this device */ - ret = -EINVAL; + ret = -ENODEV; goto exit_free_idev; } From c261d07a80576dc8ccf394ef8f074f8c67a06b37 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 21 Apr 2026 15:16:33 +0200 Subject: [PATCH 269/554] ipv6: rpl: reserve mac_len headroom when recompressed SRH grows commit 9e6bf146b55999a095bb14f73a843942456d1adc upstream. ipv6_rpl_srh_rcv() decompresses an RFC 6554 Source Routing Header, swaps the next segment into ipv6_hdr->daddr, recompresses, then pulls the old header and pushes the new one plus the IPv6 header back. The recompressed header can be larger than the received one when the swap reduces the common-prefix length the segments share with daddr (CmprI=0, CmprE>0, seg[0][0] != daddr[0] gives the maximum +8 bytes). pskb_expand_head() was gated on segments_left == 0, so on earlier segments the push consumed unchecked headroom. Once skb_push() leaves fewer than skb->mac_len bytes in front of data, skb_mac_header_rebuild()'s call to: skb_set_mac_header(skb, -skb->mac_len); will store (data - head) - mac_len into the u16 mac_header field, which wraps to ~65530, and the following memmove() writes mac_len bytes ~64KiB past skb->head. A single AF_INET6/SOCK_RAW/IPV6_HDRINCL packet over lo with a two segment type-3 SRH (CmprI=0, CmprE=15) reaches headroom 8 after one pass; KASAN reports a 14-byte OOB write in ipv6_rthdr_rcv. Fix this by expanding the head whenever the remaining room is less than the push size plus mac_len, and request that much extra so the rebuilt MAC header fits afterwards. Fixes: 8610c7c6e3bd ("net: ipv6: add support for rpl sr exthdr") Cc: stable Reported-by: Anthropic Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026042133-gout-unvented-1bd9@gregkh Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/exthdrs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 1d509b6d16bbd..ec03bcff6b65e 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -491,6 +491,7 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct inet6_dev *idev; struct ipv6hdr *oldhdr; + unsigned int chdr_len; unsigned char *buf; int accept_rpl_seg; int i, err; @@ -592,8 +593,10 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb) skb_pull(skb, ((hdr->hdrlen + 1) << 3)); skb_postpull_rcsum(skb, oldhdr, sizeof(struct ipv6hdr) + ((hdr->hdrlen + 1) << 3)); - if (unlikely(!hdr->segments_left)) { - if (pskb_expand_head(skb, sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3), 0, + chdr_len = sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3); + if (unlikely(!hdr->segments_left || + skb_headroom(skb) < chdr_len + skb->mac_len)) { + if (pskb_expand_head(skb, chdr_len + skb->mac_len, 0, GFP_ATOMIC)) { __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); @@ -603,7 +606,7 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb) oldhdr = ipv6_hdr(skb); } - skb_push(skb, ((chdr->hdrlen + 1) << 3) + sizeof(struct ipv6hdr)); + skb_push(skb, chdr_len); skb_reset_network_header(skb); skb_mac_header_rebuild(skb); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); From 30c000a49094ec568c9b51b7421f7a4a3f0b0298 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 20 Apr 2026 14:57:15 -0700 Subject: [PATCH 270/554] drm/amdgpu: fix zero-size GDS range init on RDNA4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 095a8b0ad3c3b5cdc3850d961adb8a8f735220bb upstream. RDNA4 (GFX 12) hardware removes the GDS, GWS, and OA on-chip memory resources. The gfx_v12_0 initialisation code correctly leaves adev->gds.gds_size, adev->gds.gws_size, and adev->gds.oa_size at zero to reflect this. amdgpu_ttm_init() unconditionally calls amdgpu_ttm_init_on_chip() for each of these resources regardless of size. When the size is zero, amdgpu_ttm_init_on_chip() forwards the call to ttm_range_man_init(), which calls drm_mm_init(mm, 0, 0). drm_mm_init() immediately fires DRM_MM_BUG_ON(start + size <= start) -- trivially true when size is zero -- crashing the kernel during modprobe of amdgpu on an RX 9070 XT. Guard against this by returning 0 early from amdgpu_ttm_init_on_chip() when size_in_page is zero. This skips TTM resource manager registration for hardware resources that are absent, without affecting any other GPU type. DRM_MM_BUG_ON() only asserts if CONFIG_DRM_DEBUG_MM is enabled in the kernel config. This is apparently rarely enabled as these chips have been in the market for over a year and this issue was only reported now. Link: https://lore.kernel.org/all/bug-221376-2300@https.bugzilla.kernel.org%2F/ Link: https://bugzilla.kernel.org/show_bug.cgi?id=221376 Oops-Analysis: http://oops.fenrus.org/reports/bugzilla.korg/221376/report.html Assisted-by: GitHub Copilot:Claude Sonnet 4.6 linux-kernel-oops-x86. Signed-off-by: Arjan van de Ven Cc: Alex Deucher Cc: "Christian König" Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Alex Deucher (cherry picked from commit 5719ce5865279cad4fd5f01011fe037168503f2d) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 4183e5301cffc..d629c5f73bf59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -75,6 +75,9 @@ static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev, unsigned int type, uint64_t size_in_page) { + if (!size_in_page) + return 0; + return ttm_range_man_init(&adev->mman.bdev, type, false, size_in_page); } From 6153878c5255bb69b7d0868105ca078ef13cbcf8 Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Sun, 26 Apr 2026 05:49:34 +0530 Subject: [PATCH 271/554] ALSA: caiaq: fix usb_dev refcount leak on probe failure commit 7a5f1cd22d47f8ca4b760b6334378ae42c1bd24b upstream. create_card() takes a reference on the USB device with usb_get_dev() and stores the matching usb_put_dev() in card_free(), which is installed as the snd_card's ->private_free destructor. However, ->private_free is only assigned near the end of init_card(), after several failure points (usb_set_interface(), EP type checks, usb_submit_urb(), the EP1_CMD_GET_DEVICE_INFO exchange, and its timeout). When any of those fail, init_card() returns an error to snd_probe(), which calls snd_card_free(card). Because ->private_free is still NULL, card_free() never runs, the usb_get_dev() reference is not dropped, and the struct usb_device leaks along with its descriptor allocations and device_private. syzbot reproduces this with a malformed UAC3 device whose only valid altsetting is 0; init_card()'s usb_set_interface(usb_dev, 0, 1) call fails with -EIO and triggers the leak. Move the ->private_free assignment into create_card(), immediately after usb_get_dev(), so that every error path reaching snd_card_free() balances the reference. card_free()'s callees (snd_usb_caiaq_input_free, free_urbs, kfree) already tolerate the partially-initialized state because the chip private area is zero-initialized by snd_card_new(). Fixes: 80bb50e2d459 ("ALSA: caiaq: take a reference on the USB device in create_card()") Reported-by: syzbot+2afd7e71155c7e241560@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2afd7e71155c7e241560 Tested-by: syzbot+2afd7e71155c7e241560@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Deepanshu Kartikey Link: https://patch.msgid.link/20260426001934.70813-1-kartikey406@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/caiaq/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index e78980ab17567..b20aae0caf60a 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -423,6 +423,7 @@ static int create_card(struct usb_device *usb_dev, cdev = caiaqdev(card); cdev->chip.dev = usb_get_dev(usb_dev); + card->private_free = card_free; cdev->chip.card = card; cdev->chip.usb_id = USB_ID(le16_to_cpu(usb_dev->descriptor.idVendor), le16_to_cpu(usb_dev->descriptor.idProduct)); @@ -511,7 +512,6 @@ static int init_card(struct snd_usb_caiaqdev *cdev) scnprintf(card->longname, sizeof(card->longname), "%s %s (%s)", cdev->vendor_name, cdev->product_name, usbpath); - card->private_free = card_free; err = setup_card(cdev); if (err < 0) goto err_kill_urb; From bdd9503c3d222d2735b56c7a8b4422ccf3de6e5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Fri, 24 Apr 2026 09:48:41 -0300 Subject: [PATCH 272/554] ALSA: aloop: Fix peer runtime UAF during format-change stop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e5c33cdc6f402eab8abd36ecf436b22c9d3a8aff upstream. loopback_check_format() may stop the capture side when playback starts with parameters that no longer match a running capture stream. Commit 826af7fa62e3 ("ALSA: aloop: Fix racy access at PCM trigger") moved the peer lookup under cable->lock, but the actual snd_pcm_stop() still runs after dropping that lock. A concurrent close can clear the capture entry from cable->streams[] and detach or free its runtime while the playback trigger path still holds a stale peer substream pointer. Keep a per-cable count of in-flight peer stops before dropping cable->lock, and make free_cable() wait for those stops before detaching the runtime. This preserves the existing behavior while making the peer runtime lifetime explicit. Reported-by: syzbot+8fa95c41eafbc9d2ff6f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8fa95c41eafbc9d2ff6f Fixes: 597603d615d2 ("ALSA: introduce the snd-aloop module for the PCM loopback") Cc: stable@vger.kernel.org Suggested-by: Takashi Iwai Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260424-alsa-aloop-peer-stop-uaf-v2-1-94e68101db8a@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/drivers/aloop.c | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index aa0d2fcb1a180..a37a1695f51c7 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -99,6 +99,9 @@ struct loopback_ops { struct loopback_cable { spinlock_t lock; struct loopback_pcm *streams[2]; + /* in-flight peer stops running outside cable->lock */ + atomic_t stop_count; + wait_queue_head_t stop_wait; struct snd_pcm_hardware hw; /* flags */ unsigned int valid; @@ -366,8 +369,11 @@ static int loopback_check_format(struct loopback_cable *cable, int stream) return 0; if (stream == SNDRV_PCM_STREAM_CAPTURE) return -EIO; - else if (cruntime->state == SNDRV_PCM_STATE_RUNNING) + else if (cruntime->state == SNDRV_PCM_STATE_RUNNING) { + /* close must not free the peer runtime below */ + atomic_inc(&cable->stop_count); stop_capture = true; + } } setup = get_setup(dpcm_play); @@ -396,8 +402,11 @@ static int loopback_check_format(struct loopback_cable *cable, int stream) } } - if (stop_capture) + if (stop_capture) { snd_pcm_stop(dpcm_capt->substream, SNDRV_PCM_STATE_DRAINING); + if (atomic_dec_and_test(&cable->stop_count)) + wake_up(&cable->stop_wait); + } return 0; } @@ -1049,23 +1058,29 @@ static void free_cable(struct snd_pcm_substream *substream) struct loopback *loopback = substream->private_data; int dev = get_cable_index(substream); struct loopback_cable *cable; + struct loopback_pcm *dpcm; + bool other_alive; cable = loopback->cables[substream->number][dev]; if (!cable) return; - if (cable->streams[!substream->stream]) { - /* other stream is still alive */ - guard(spinlock_irq)(&cable->lock); - cable->streams[substream->stream] = NULL; - } else { - struct loopback_pcm *dpcm = substream->runtime->private_data; - if (cable->ops && cable->ops->close_cable && dpcm) - cable->ops->close_cable(dpcm); - /* free the cable */ - loopback->cables[substream->number][dev] = NULL; - kfree(cable); + scoped_guard(spinlock_irq, &cable->lock) { + cable->streams[substream->stream] = NULL; + other_alive = cable->streams[!substream->stream]; } + + /* Pair with the stop_count increment in loopback_check_format(). */ + wait_event(cable->stop_wait, !atomic_read(&cable->stop_count)); + if (other_alive) + return; + + dpcm = substream->runtime->private_data; + if (cable->ops && cable->ops->close_cable && dpcm) + cable->ops->close_cable(dpcm); + /* free the cable */ + loopback->cables[substream->number][dev] = NULL; + kfree(cable); } static int loopback_jiffies_timer_open(struct loopback_pcm *dpcm) @@ -1260,6 +1275,8 @@ static int loopback_open(struct snd_pcm_substream *substream) goto unlock; } spin_lock_init(&cable->lock); + atomic_set(&cable->stop_count, 0); + init_waitqueue_head(&cable->stop_wait); cable->hw = loopback_pcm_hardware; if (loopback->timer_source) cable->ops = &loopback_snd_timer_ops; From e9b057a44deff4c59c13f44672a5cc74dcd57522 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 20 Apr 2026 13:47:26 +0200 Subject: [PATCH 273/554] vmalloc: fix buffer overflow in vrealloc_node_align() commit 82d1f01292d3f09bf063f829f8ab8de12b4280a1 upstream. Commit 4c5d3365882d ("mm/vmalloc: allow to set node and align in vrealloc") added the ability to force a new allocation if the current pointer is on the wrong NUMA node, or if an alignment constraint is not met, even if the user is shrinking the allocation. On this path (need_realloc), the code allocates a new object of 'size' bytes and then memcpy()s 'old_size' bytes into it. If the request is to shrink the object (size < old_size), this results in an out-of-bounds write on the new buffer. Fix this by bounding the copy length by the new allocation size. Link: https://lore.kernel.org/20260420114805.3572606-2-elver@google.com Fixes: 4c5d3365882d ("mm/vmalloc: allow to set node and align in vrealloc") Signed-off-by: Marco Elver Reported-by: Harry Yoo (Oracle) Reviewed-by: Uladzislau Rezki (Sony) Acked-by: Vlastimil Babka (SUSE) Reviewed-by: Harry Yoo (Oracle) Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index c5368e171411d..021fc25268866 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4201,7 +4201,7 @@ void *vrealloc_node_align_noprof(const void *p, size_t size, unsigned long align return NULL; if (p) { - memcpy(n, p, old_size); + memcpy(n, p, min(size, old_size)); vfree(p); } From 05b4ed8bef30bba4f559c8d835e2dd20c48cf8a4 Mon Sep 17 00:00:00 2001 From: "Harry Yoo (Oracle)" Date: Mon, 27 Apr 2026 16:09:52 +0900 Subject: [PATCH 274/554] mm/page_alloc: return NULL early from alloc_frozen_pages_nolock() in NMI on UP commit 620b46ed6ae17c8438d889c8c0cfddab36a1476c upstream. On UP kernels (!CONFIG_SMP), spin_trylock() is a no-op that unconditionally succeeds even when the lock is already held. As a result, alloc_frozen_pages_nolock() called from NMI context can re-enter rmqueue() and acquire the zone lock that the interrupted context is already holding, corrupting the freelists. With CONFIG_DEBUG_SPINLOCK on UP, the following BUG is triggered with the slub_kunit test module: BUG: spinlock trylock failure on UP on CPU#0, kunit_try_catch/243 [...] Call Trace: dump_stack_lvl+0x3f/0x60 do_raw_spin_trylock+0x41/0x50 _raw_spin_trylock+0x24/0x50 rmqueue.isra.0+0x2a9/0xa70 get_page_from_freelist+0xeb/0x450 alloc_frozen_pages_nolock_noprof+0x111/0x1e0 allocate_slab+0x42a/0x500 ___slab_alloc+0xa7/0x4c0 kmalloc_nolock_noprof+0x164/0x310 [...] Fix this by returning NULL early when invoked from NMI on a UP kernel. Link: https://lore.kernel.org/linux-mm/ad_cqe51pvr1WaDg@hyeyoo Cc: stable@vger.kernel.org Fixes: d7242af86434 ("mm: Introduce alloc_frozen_pages_nolock()") Signed-off-by: Harry Yoo (Oracle) Link: https://patch.msgid.link/20260427-nolock-api-fix-v2-1-a6b83a92d9a4@kernel.org Signed-off-by: Vlastimil Babka (SUSE) Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 43363fd3b8ea4..f676966180035 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7641,6 +7641,11 @@ struct page *alloc_frozen_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned */ if (IS_ENABLED(CONFIG_PREEMPT_RT) && (in_nmi() || in_hardirq())) return NULL; + + /* On UP, spin_trylock() always succeeds even when it is locked */ + if (!IS_ENABLED(CONFIG_SMP) && in_nmi()) + return NULL; + if (!pcp_allowed_order(order)) return NULL; From a8d95d274be241ad21f6523bf2d6ba0d7d7e46b7 Mon Sep 17 00:00:00 2001 From: "Harry Yoo (Oracle)" Date: Mon, 27 Apr 2026 16:09:53 +0900 Subject: [PATCH 275/554] mm/slab: return NULL early from kmalloc_nolock() in NMI on UP commit 5b31044e649e3e54c2caef135c09b371c2fbcd08 upstream. On UP kernels (!CONFIG_SMP), spin_trylock() is a no-op that unconditionally succeeds even when the lock is already held. As a result, kmalloc_nolock() called from NMI context can re-enter the slab allocator and acquire n->list_lock that the interrupted context is already holding, corrupting slab state. With CONFIG_DEBUG_SPINLOCK on UP, the following BUG is triggered with the slub_kunit test module: BUG: spinlock trylock failure on UP on CPU#0, kunit_try_catch/243 [...] Call Trace: dump_stack_lvl+0x3f/0x60 do_raw_spin_trylock+0x41/0x50 _raw_spin_trylock+0x24/0x50 get_from_partial_node+0x120/0x4d0 ___slab_alloc+0x8a/0x4c0 kmalloc_nolock_noprof+0x164/0x310 [...] Fix this by returning NULL early when invoked from NMI on a UP kernel. Link: https://lore.kernel.org/linux-mm/ad_cqe51pvr1WaDg@hyeyoo Cc: stable@vger.kernel.org Fixes: af92793e52c3 ("slab: Introduce kmalloc_nolock() and kfree_nolock().") Signed-off-by: Harry Yoo (Oracle) Link: https://patch.msgid.link/20260427-nolock-api-fix-v2-2-a6b83a92d9a4@kernel.org Signed-off-by: Vlastimil Babka (SUSE) Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 27aa0e2975a5f..a89df6ddcc587 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5752,6 +5752,11 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node) * sleeping lock on RT. */ return NULL; + + /* On UP, spin_trylock() always succeeds even when it is locked */ + if (!IS_ENABLED(CONFIG_SMP) && in_nmi()) + return NULL; + retry: if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return NULL; From b778b6d095421619c331fd2d7751143cd5387103 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Tue, 21 Apr 2026 11:47:35 +0200 Subject: [PATCH 276/554] net: ipv6: fix NOREF dst use in seg6 and rpl lwtunnels commit f9c52a6ba9780bd27e0bf4c044fd91c13c778b6e upstream. seg6_input_core() and rpl_input() call ip6_route_input() which sets a NOREF dst on the skb, then pass it to dst_cache_set_ip6() invoking dst_hold() unconditionally. On PREEMPT_RT, ksoftirqd is preemptible and a higher-priority task can release the underlying pcpu_rt between the lookup and the caching through a concurrent FIB lookup on a shared nexthop. Simplified race sequence: ksoftirqd/X higher-prio task (same CPU X) ----------- -------------------------------- seg6_input_core(,skb)/rpl_input(skb) dst_cache_get() -> miss ip6_route_input(skb) -> ip6_pol_route(,skb,flags) [RT6_LOOKUP_F_DST_NOREF in flags] -> FIB lookup resolves fib6_nh [nhid=N route] -> rt6_make_pcpu_route() [creates pcpu_rt, refcount=1] pcpu_rt->sernum = fib6_sernum [fib6_sernum=W] -> cmpxchg(fib6_nh.rt6i_pcpu, NULL, pcpu_rt) [slot was empty, store succeeds] -> skb_dst_set_noref(skb, dst) [dst is pcpu_rt, refcount still 1] rt_genid_bump_ipv6() -> bumps fib6_sernum [fib6_sernum from W to Z] ip6_route_output() -> ip6_pol_route() -> FIB lookup resolves fib6_nh [nhid=N] -> rt6_get_pcpu_route() pcpu_rt->sernum != fib6_sernum [W <> Z, stale] -> prev = xchg(rt6i_pcpu, NULL) -> dst_release(prev) [prev is pcpu_rt, refcount 1->0, dead] dst = skb_dst(skb) [dst is the dead pcpu_rt] dst_cache_set_ip6(dst) -> dst_hold() on dead dst -> WARN / use-after-free For the race to occur, ksoftirqd must be preemptible (PREEMPT_RT without PREEMPT_RT_NEEDS_BH_LOCK) and a concurrent task must be able to release the pcpu_rt. Shared nexthop objects provide such a path, as two routes pointing to the same nhid share the same fib6_nh and its rt6i_pcpu entry. Fix seg6_input_core() and rpl_input() by calling skb_dst_force() after ip6_route_input() to force the NOREF dst into a refcounted one before caching. The output path is not affected as ip6_route_output() already returns a refcounted dst. Fixes: af4a2209b134 ("ipv6: sr: use dst_cache in seg6_input") Fixes: a7a29f9c361f ("net: ipv6: add rpl sr tunnel") Cc: stable@vger.kernel.org Signed-off-by: Andrea Mayer Reviewed-by: Simon Horman Reviewed-by: Justin Iurman Link: https://patch.msgid.link/20260421094735.20997-1-andrea.mayer@uniroma2.it Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/ipv6/rpl_iptunnel.c | 9 +++++++++ net/ipv6/seg6_iptunnel.c | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index c7942cf655671..4e10adcd70e89 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -287,7 +287,16 @@ static int rpl_input(struct sk_buff *skb) if (!dst) { ip6_route_input(skb); + + /* ip6_route_input() sets a NOREF dst; force a refcount on it + * before caching or further use. + */ + skb_dst_force(skb); dst = skb_dst(skb); + if (unlikely(!dst)) { + err = -ENETUNREACH; + goto drop; + } /* cache only if we don't create a dst reference loop */ if (!dst->error && lwtst != dst->lwtstate) { diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 351a0ed7441cc..ead677bca4901 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -500,7 +500,16 @@ static int seg6_input_core(struct net *net, struct sock *sk, if (!dst) { ip6_route_input(skb); + + /* ip6_route_input() sets a NOREF dst; force a refcount on it + * before caching or further use. + */ + skb_dst_force(skb); dst = skb_dst(skb); + if (unlikely(!dst)) { + err = -ENETUNREACH; + goto drop; + } /* cache only if we don't create a dst reference loop */ if (!dst->error && lwtst != dst->lwtstate) { From 6f820139d16a4c9865a145d4a9cf9c92cc632c14 Mon Sep 17 00:00:00 2001 From: Kai Ma Date: Wed, 22 Apr 2026 22:54:18 +0800 Subject: [PATCH 277/554] netfilter: reject zero shift in nft_bitwise commit fe11e5c40817b84abaa5d83bfb6586d8412bfd07 upstream. Reject zero shift operands for nft_bitwise left and right shift expressions during initialization. The carry propagation logic computes the carry from the adjacent 32-bit word using BITS_PER_TYPE(u32) - shift. A zero shift operand turns this into a 32-bit shift, which is undefined behaviour. Reject zero shift operands in the control plane, alongside the existing check for values greater than or equal to 32, so malformed rules never reach the packet path. Fixes: 567d746b55bc ("netfilter: bitwise: add support for shifts.") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Kai Ma Signed-off-by: Ren Wei Reviewed-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_bitwise.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index d550910aabec9..af990c600745b 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -196,7 +196,8 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv, if (err < 0) return err; - if (priv->data.data[0] >= BITS_PER_TYPE(u32)) { + if (!priv->data.data[0] || + priv->data.data[0] >= BITS_PER_TYPE(u32)) { nft_data_release(&priv->data, desc.type); return -EINVAL; } From 52a7c9a2823da0d06b1a659b1bea1290a79a5044 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 13 Apr 2026 07:09:15 -0500 Subject: [PATCH 278/554] ipmi:ssif: Remove unnecessary indention commit 91eb7ec7261254b6875909df767185838598e21e upstream. A section was in {} that didn't need to be, move the variable definition to the top and set th eindentino properly. Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_ssif.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 039d5d26b5de2..5697b13449703 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -1662,6 +1662,7 @@ static int ssif_probe(struct i2c_client *client) int len = 0; int i; u8 slave_addr = 0; + unsigned int thread_num; struct ssif_addr_info *addr_info = NULL; mutex_lock(&ssif_infos_mutex); @@ -1880,22 +1881,17 @@ static int ssif_probe(struct i2c_client *client) ssif_info->handlers.request_events = request_events; ssif_info->handlers.set_need_watch = ssif_set_need_watch; - { - unsigned int thread_num; - - thread_num = ((i2c_adapter_id(ssif_info->client->adapter) - << 8) | - ssif_info->client->addr); - init_completion(&ssif_info->wake_thread); - ssif_info->thread = kthread_run(ipmi_ssif_thread, ssif_info, - "kssif%4.4x", thread_num); - if (IS_ERR(ssif_info->thread)) { - rv = PTR_ERR(ssif_info->thread); - dev_notice(&ssif_info->client->dev, - "Could not start kernel thread: error %d\n", - rv); - goto out; - } + thread_num = ((i2c_adapter_id(ssif_info->client->adapter) << 8) | + ssif_info->client->addr); + init_completion(&ssif_info->wake_thread); + ssif_info->thread = kthread_run(ipmi_ssif_thread, ssif_info, + "kssif%4.4x", thread_num); + if (IS_ERR(ssif_info->thread)) { + rv = PTR_ERR(ssif_info->thread); + dev_notice(&ssif_info->client->dev, + "Could not start kernel thread: error %d\n", + rv); + goto out; } dev_set_drvdata(&ssif_info->client->dev, ssif_info); From 97d35052a816875ad16bc37ae163ebc873b2928d Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 21 Apr 2026 06:50:22 -0500 Subject: [PATCH 279/554] ipmi:ssif: NULL thread on error commit a8aebe93a4938c0ca1941eeaae821738f869be3d upstream. Cleanup code was checking the thread for NULL, but it was possibly a PTR_ERR() in one spot. Spotted with static analysis. Link: https://sourceforge.net/p/openipmi/mailman/message/59324676/ Fixes: 75c486cb1bca ("ipmi:ssif: Clean up kthread on errors") Cc: # 91eb7ec72612: ipmi:ssif: Remove unnecessary indention Cc: stable@vger.kernel.org Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_ssif.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 5697b13449703..4d4fbf6931ffc 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -1888,6 +1888,7 @@ static int ssif_probe(struct i2c_client *client) "kssif%4.4x", thread_num); if (IS_ERR(ssif_info->thread)) { rv = PTR_ERR(ssif_info->thread); + ssif_info->thread = NULL; dev_notice(&ssif_info->client->dev, "Could not start kernel thread: error %d\n", rv); From ce685b6a03dc0270f78bc7aaf2c9ada9cbd45a86 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 7 May 2026 06:12:03 +0200 Subject: [PATCH 280/554] Linux 6.18.27 Link: https://lore.kernel.org/r/20260504135142.929052779@linuxfoundation.org Tested-by: Brett A C Sheffield Tested-by: Peter Schneider Tested-by: Wentao Guan Tested-by: Florian Fainelli Tested-by: Ron Economos Tested-by: Miguel Ojeda Tested-by: Mark Brown Tested-by: Shuah Khan Tested-by: Dileep Malepu Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a3eefbf12ed92..eb796fab33804 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 18 -SUBLEVEL = 26 +SUBLEVEL = 27 EXTRAVERSION = NAME = Baby Opossum Posse From 71a1d9d985d26716f74d21f18ee8cac821b06e97 Mon Sep 17 00:00:00 2001 From: Kuan-Ting Chen Date: Mon, 4 May 2026 23:27:12 +0800 Subject: [PATCH 281/554] xfrm: esp: avoid in-place decrypt on shared skb frags commit f4c50a4034e62ab75f1d5cdd191dd5f9c77fdff4 upstream. MSG_SPLICE_PAGES can attach pages from a pipe directly to an skb. TCP marks such skbs with SKBFL_SHARED_FRAG after skb_splice_from_iter(), so later paths that may modify packet data can first make a private copy. The IPv4/IPv6 datagram append paths did not set this flag when splicing pages into UDP skbs. That leaves an ESP-in-UDP packet made from shared pipe pages looking like an ordinary uncloned nonlinear skb. ESP input then takes the no-COW fast path for uncloned skbs without a frag_list and decrypts in place over data that is not owned privately by the skb. Mark IPv4/IPv6 datagram splice frags with SKBFL_SHARED_FRAG, matching TCP. Also make ESP input fall back to skb_cow_data() when the flag is present, so ESP does not decrypt externally backed frags in place. Private nonlinear skb frags still use the existing fast path. This intentionally does not change ESP output. In esp_output_head(), the path that appends the ESP trailer to existing skb tailroom without calling skb_cow_data() is not reachable for nonlinear skbs: skb_tailroom() returns zero when skb->data_len is nonzero, while ESP tailen is positive. Thus ESP output will either use the separate destination-frag path or fall back to skb_cow_data(). Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible") Fixes: 03e2a30f6a27 ("esp6: Avoid skb_cow_data whenever possible") Fixes: 7da0dde68486 ("ip, udp: Support MSG_SPLICE_PAGES") Fixes: 6d8192bd69bb ("ip6, udp6: Support MSG_SPLICE_PAGES") Reported-by: Hyunwoo Kim Reported-by: Kuan-Ting Chen Tested-by: Hyunwoo Kim Cc: stable@vger.kernel.org Signed-off-by: Kuan-Ting Chen Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/ipv4/esp4.c | 3 ++- net/ipv4/ip_output.c | 2 ++ net/ipv6/esp6.c | 3 ++- net/ipv6/ip6_output.c | 2 ++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 6dfc0bcdef654..6a5febbdbee49 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -873,7 +873,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) nfrags = 1; goto skip_cow; - } else if (!skb_has_frag_list(skb)) { + } else if (!skb_has_frag_list(skb) && + !skb_has_shared_frag(skb)) { nfrags = skb_shinfo(skb)->nr_frags; nfrags++; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ff11d3a85a362..7c005263262ff 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1233,6 +1233,8 @@ static int __ip_append_data(struct sock *sk, if (err < 0) goto error; copy = err; + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9f75313734f8c..9c06c5a1419dc 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -915,7 +915,8 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) nfrags = 1; goto skip_cow; - } else if (!skb_has_frag_list(skb)) { + } else if (!skb_has_frag_list(skb) && + !skb_has_shared_frag(skb)) { nfrags = skb_shinfo(skb)->nr_frags; nfrags++; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f904739e99b90..f5ca0267e7706 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1777,6 +1777,8 @@ static int __ip6_append_data(struct sock *sk, if (err < 0) goto error; copy = err; + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; From c31b94a4231c974dca12e6b83f2af1d1d95dfc5e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 8 May 2026 08:40:23 +0200 Subject: [PATCH 282/554] Linux 6.18.28 Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index eb796fab33804..8d068587e6bd5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 18 -SUBLEVEL = 27 +SUBLEVEL = 28 EXTRAVERSION = NAME = Baby Opossum Posse From 3eae0f4f9f7206a4801efa5e0235c25bbd5a412c Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Fri, 8 May 2026 17:53:09 +0900 Subject: [PATCH 283/554] rxrpc: Also unshare DATA/RESPONSE packets when paged frags are present commit aa54b1d27fe0c2b78e664a34fd0fdf7cd1960d71 upstream. The DATA-packet handler in rxrpc_input_call_event() and the RESPONSE handler in rxrpc_verify_response() copy the skb to a linear one before calling into the security ops only when skb_cloned() is true. An skb that is not cloned but still carries externally-owned paged fragments (e.g. SKBFL_SHARED_FRAG set by splice() into a UDP socket via __ip_append_data, or a chained skb_has_frag_list()) falls through to the in-place decryption path, which binds the frag pages directly into the AEAD/skcipher SGL via skb_to_sgvec(). Extend the gate to also unshare when skb_has_frag_list() or skb_has_shared_frag() is true. This catches the splice-loopback vector and other externally-shared frag sources while preserving the zero-copy fast path for skbs whose frags are kernel-private (e.g. NIC page_pool RX, GRO). The OOM/trace handling already in place is reused. Fixes: d0d5c0cd1e71 ("rxrpc: Use skb_unshare() rather than skb_cow_data()") Cc: stable@vger.kernel.org Signed-off-by: Hyunwoo Kim Reviewed-by: Jiayuan Chen Acked-by: David Howells Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/call_event.c | 4 +++- net/rxrpc/conn_event.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index fdd683261226c..2b19b252225e5 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -334,7 +334,9 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.securityIndex != 0 && - skb_cloned(skb)) { + (skb_cloned(skb) || + skb_has_frag_list(skb) || + skb_has_shared_frag(skb))) { /* Unshare the packet so that it can be * modified by in-place decryption. */ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index a2130d25aaa9b..442414d90ba1c 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -245,7 +245,8 @@ static int rxrpc_verify_response(struct rxrpc_connection *conn, { int ret; - if (skb_cloned(skb)) { + if (skb_cloned(skb) || skb_has_frag_list(skb) || + skb_has_shared_frag(skb)) { /* Copy the packet if shared so that we can do in-place * decryption. */ From d31a849ff5011dad5c271b53819a0b279e367d68 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 11 May 2026 08:20:52 +0200 Subject: [PATCH 284/554] Linux 6.18.29 Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8d068587e6bd5..ca41f54bea2ee 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 18 -SUBLEVEL = 28 +SUBLEVEL = 29 EXTRAVERSION = NAME = Baby Opossum Posse From 00d91bfdce5033f5d9b4915638ae9b0553848b5d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 11 Apr 2026 14:06:00 +0200 Subject: [PATCH 285/554] scsi: target: configfs: Bound snprintf() return in tg_pt_gp_members_show() commit 772a896a56e0e3ef9424a025cec9176f9d8f4552 upstream. target_tg_pt_gp_members_show() formats LUN paths with snprintf() into a 256-byte stack buffer, then will memcpy() cur_len bytes from that buffer. snprintf() returns the length the output would have had, which can exceed the buffer size when the fabric WWN is long because iSCSI IQN names can be up to 223 bytes. The check at the memcpy() site only guards the destination page write, not the source read, so memcpy() will read past the stack buffer and copy adjacent stack contents to the sysfs reader, which when CONFIG_FORTIFY_SOURCE is enabled, fortify_panic() will be triggered. Commit 27e06650a5ea ("scsi: target: target_core_configfs: Add length check to avoid buffer overflow") added the same bound to the target_lu_gp_members_show() but the tg_pt_gp variant was missed so resolve that here. Cc: Martin K. Petersen Fixes: c66ac9db8d4a ("[SCSI] target: Add LIO target core v4.0.0-rc6") Assisted-by: gregkh_clanker_t1000 Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026041159-garter-theft-3be0@gregkh Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index a2bd2e81d2c68..7e101344e27e0 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -3170,7 +3170,7 @@ static ssize_t target_tg_pt_gp_members_show(struct config_item *item, config_item_name(&lun->lun_group.cg_item)); cur_len++; /* Extra byte for NULL terminator */ - if ((cur_len + len) > PAGE_SIZE) { + if (cur_len > TG_PT_GROUP_NAME_BUF || (cur_len + len) > PAGE_SIZE) { pr_warn("Ran out of lu_gp_show_attr" "_members buffer\n"); break; From 3d37d2165df9504ea99d9e6181552dc4d2d1ab37 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 20 Apr 2026 12:39:51 -0500 Subject: [PATCH 286/554] ipmi: Add limits to event and receive message requests commit c4cca236968683eb0d59abfb12d5c7e4d8514227 upstream. The driver would just fetch events and receive messages until the BMC said it was done. To avoid issues with BMCs that never say they are done, add a limit of 10 fetches at a time. In addition, an si interface has an attn state it can return from the hardware which is supposed to cause a flag fetch to see if the driver needs to fetch events or message or a few other things. If the attn bit gets stuck, it's a similar problem. So allow messages in between flag fetches so the driver itself doesn't get stuck. This is a more general fix than the previous fix for the specific bad BMC, but should fix the more general issue of a BMC that won't stop saying it has data. This has been there from the beginning of the driver. It's not a bug per-se, but it is accounting for bugs in BMCs. Reported-by: Matt Fleming Closes: https://lore.kernel.org/lkml/20260415115930.3428942-1-matt@readmodwrite.com/ Fixes: <1da177e4c3f4> ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_si_intf.c | 54 +++++++++++++++++++++++++------- drivers/char/ipmi/ipmi_ssif.c | 23 ++++++++++++-- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index f3a4fa98b1efd..f67b7ffe1050c 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -168,6 +168,10 @@ struct smi_info { OEM2_DATA_AVAIL) unsigned char msg_flags; + /* When requesting events and messages, don't do it forever. */ + unsigned int num_requests_in_a_row; + bool last_was_flag_fetch; + /* Does the BMC have an event buffer? */ bool has_event_buffer; @@ -411,7 +415,10 @@ static void start_getting_msg_queue(struct smi_info *smi_info) start_new_msg(smi_info, smi_info->curr_msg->data, smi_info->curr_msg->data_size); - smi_info->si_state = SI_GETTING_MESSAGES; + if (smi_info->si_state != SI_GETTING_MESSAGES) { + smi_info->num_requests_in_a_row = 0; + smi_info->si_state = SI_GETTING_MESSAGES; + } } static void start_getting_events(struct smi_info *smi_info) @@ -422,7 +429,10 @@ static void start_getting_events(struct smi_info *smi_info) start_new_msg(smi_info, smi_info->curr_msg->data, smi_info->curr_msg->data_size); - smi_info->si_state = SI_GETTING_EVENTS; + if (smi_info->si_state != SI_GETTING_EVENTS) { + smi_info->num_requests_in_a_row = 0; + smi_info->si_state = SI_GETTING_EVENTS; + } } /* @@ -596,6 +606,7 @@ static void handle_transaction_done(struct smi_info *smi_info) smi_info->si_state = SI_NORMAL; } else { smi_info->msg_flags = msg[3]; + smi_info->last_was_flag_fetch = true; handle_flags(smi_info); } break; @@ -641,6 +652,11 @@ static void handle_transaction_done(struct smi_info *smi_info) } else { smi_inc_stat(smi_info, events); + smi_info->num_requests_in_a_row++; + if (smi_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + smi_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL; + /* * Do this before we deliver the message * because delivering the message releases the @@ -679,6 +695,11 @@ static void handle_transaction_done(struct smi_info *smi_info) } else { smi_inc_stat(smi_info, incoming_messages); + smi_info->num_requests_in_a_row++; + if (smi_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + smi_info->msg_flags &= ~RECEIVE_MSG_AVAIL; + /* * Do this before we deliver the message * because delivering the message releases the @@ -820,6 +841,26 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, goto out; } + /* + * If we are currently idle, or if the last thing that was + * done was a flag fetch and there is a message pending, try + * to start the next message. + * + * We do the waiting message check to avoid a stuck flag + * completely wedging the driver. Let a message through + * in between flag operations if that happens. + */ + if (si_sm_result == SI_SM_IDLE || + (si_sm_result == SI_SM_ATTN && smi_info->waiting_msg && + smi_info->last_was_flag_fetch)) { + smi_info->last_was_flag_fetch = false; + smi_inc_stat(smi_info, idles); + + si_sm_result = start_next_msg(smi_info); + if (si_sm_result != SI_SM_IDLE) + goto restart; + } + /* * We prefer handling attn over new messages. But don't do * this if there is not yet an upper layer to handle anything. @@ -847,15 +888,6 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, } } - /* If we are currently idle, try to start the next message. */ - if (si_sm_result == SI_SM_IDLE) { - smi_inc_stat(smi_info, idles); - - si_sm_result = start_next_msg(smi_info); - if (si_sm_result != SI_SM_IDLE) - goto restart; - } - if ((si_sm_result == SI_SM_IDLE) && (atomic_read(&smi_info->req_events))) { /* diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 4d4fbf6931ffc..164ef375b3643 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -225,6 +225,9 @@ struct ssif_info { bool has_event_buffer; bool supports_alert; + /* When requesting events and messages, don't do it forever. */ + unsigned int num_requests_in_a_row; + /* * Used to tell what we should do with alerts. If we are * waiting on a response, read the data immediately. @@ -413,7 +416,10 @@ static void start_event_fetch(struct ssif_info *ssif_info, unsigned long *flags) } ssif_info->curr_msg = msg; - ssif_info->ssif_state = SSIF_GETTING_EVENTS; + if (ssif_info->ssif_state != SSIF_GETTING_EVENTS) { + ssif_info->num_requests_in_a_row = 0; + ssif_info->ssif_state = SSIF_GETTING_EVENTS; + } ipmi_ssif_unlock_cond(ssif_info, flags); msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2); @@ -436,7 +442,10 @@ static void start_recv_msg_fetch(struct ssif_info *ssif_info, } ssif_info->curr_msg = msg; - ssif_info->ssif_state = SSIF_GETTING_MESSAGES; + if (ssif_info->ssif_state != SSIF_GETTING_MESSAGES) { + ssif_info->num_requests_in_a_row = 0; + ssif_info->ssif_state = SSIF_GETTING_MESSAGES; + } ipmi_ssif_unlock_cond(ssif_info, flags); msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2); @@ -843,6 +852,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL; handle_flags(ssif_info, flags); } else { + ssif_info->num_requests_in_a_row++; + if (ssif_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL; + handle_flags(ssif_info, flags); ssif_inc_stat(ssif_info, events); deliver_recv_msg(ssif_info, msg); @@ -876,6 +890,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL; handle_flags(ssif_info, flags); } else { + ssif_info->num_requests_in_a_row++; + if (ssif_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL; + ssif_inc_stat(ssif_info, incoming_messages); handle_flags(ssif_info, flags); deliver_recv_msg(ssif_info, msg); From 42432b579a594b66ac32e5e7b7c26e6bc578ec89 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 20 Apr 2026 12:50:09 -0500 Subject: [PATCH 287/554] ipmi: Check event message buffer response for bad data commit 36920f30e78e69df01f9691c470b6f3ba8aebf98 upstream. The event message buffer response data size got checked later when processing, but check it right after the response comes back. It appears some BMCs may return an empty message instead of an error when fetching events. There are apparently some new BMCs that make this error, so we need to compensate. Reported-by: Matt Fleming Closes: https://lore.kernel.org/lkml/20260415115930.3428942-1-matt@readmodwrite.com/ Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_si_intf.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index f67b7ffe1050c..565167e0b7163 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -642,7 +642,13 @@ static void handle_transaction_done(struct smi_info *smi_info) */ msg = smi_info->curr_msg; smi_info->curr_msg = NULL; - if (msg->rsp[2] != 0) { + /* + * It appears some BMCs, with no event data, return no + * data in the message and not a 0x80 error as the + * spec says they should. Shut down processing if + * the data is not the right length. + */ + if (msg->rsp[2] != 0 || msg->rsp_size != 19) { /* Error getting event, probably done. */ msg->done(msg); From bc13fce9eeec88c4950924754c3347c6dc66ff4c Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 20 Apr 2026 13:02:18 -0500 Subject: [PATCH 288/554] ipmi:si: Return state to normal if message allocation fails commit 09dd798270ff582d7309f285d4aaf5dbebae01cb upstream. There were places where nothing would get started if a message allocation failed, so the driver needs to return to normal state. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_si_intf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 565167e0b7163..89af403fd9944 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -498,15 +498,19 @@ static void handle_flags(struct smi_info *smi_info) } else if (smi_info->msg_flags & RECEIVE_MSG_AVAIL) { /* Messages available. */ smi_info->curr_msg = alloc_msg_handle_irq(smi_info); - if (!smi_info->curr_msg) + if (!smi_info->curr_msg) { + smi_info->si_state = SI_NORMAL; return; + } start_getting_msg_queue(smi_info); } else if (smi_info->msg_flags & EVENT_MSG_BUFFER_FULL) { /* Events available. */ smi_info->curr_msg = alloc_msg_handle_irq(smi_info); - if (!smi_info->curr_msg) + if (!smi_info->curr_msg) { + smi_info->si_state = SI_NORMAL; return; + } start_getting_events(smi_info); } else if (smi_info->msg_flags & OEM_DATA_AVAIL && From da9b065cedfd3b574f229d5be594e6aa47a27ae6 Mon Sep 17 00:00:00 2001 From: Rajat Gupta Date: Sun, 3 May 2026 20:51:10 -0700 Subject: [PATCH 289/554] fbdev: udlfb: add vm_ops to dlfb_ops_mmap to prevent use-after-free commit 8de779dc40d35d39fa07387b6f921eb11df0f511 upstream. dlfb_ops_mmap() uses remap_pfn_range() to map vmalloc framebuffer pages to userspace but sets no vm_ops on the VMA. This means the kernel cannot track active mmaps. When dlfb_realloc_framebuffer() replaces the backing buffer via FBIOPUT_VSCREENINFO, existing mmap PTEs are not invalidated. On USB disconnect, dlfb_ops_destroy() calls vfree() on the old pages while userspace PTEs still reference them, resulting in a use-after-free: the process retains read/write access to freed kernel pages. Add vm_operations_struct with open/close callbacks that maintain an atomic mmap_count on struct dlfb_data. In dlfb_realloc_framebuffer(), check mmap_count and return -EBUSY if the buffer is currently mapped, preventing buffer replacement while userspace holds stale PTEs. Tested with PoC using dummy_hcd + raw_gadget USB device emulation. Signed-off-by: Rajat Gupta Acked-by: Greg Kroah-Hartman Cc: stable@vger.kernel.org Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/udlfb.c | 31 ++++++++++++++++++++++++++++++- include/video/udlfb.h | 1 + 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index 28e6d75e13ed3..70ff38f35096a 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c @@ -321,12 +321,32 @@ static int dlfb_set_video_mode(struct dlfb_data *dlfb, return retval; } +static void dlfb_vm_open(struct vm_area_struct *vma) +{ + struct dlfb_data *dlfb = vma->vm_private_data; + + atomic_inc(&dlfb->mmap_count); +} + +static void dlfb_vm_close(struct vm_area_struct *vma) +{ + struct dlfb_data *dlfb = vma->vm_private_data; + + atomic_dec(&dlfb->mmap_count); +} + +static const struct vm_operations_struct dlfb_vm_ops = { + .open = dlfb_vm_open, + .close = dlfb_vm_close, +}; + static int dlfb_ops_mmap(struct fb_info *info, struct vm_area_struct *vma) { unsigned long start = vma->vm_start; unsigned long size = vma->vm_end - vma->vm_start; unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; unsigned long page, pos; + struct dlfb_data *dlfb = info->par; if (info->fbdefio) return fb_deferred_io_mmap(info, vma); @@ -358,6 +378,9 @@ static int dlfb_ops_mmap(struct fb_info *info, struct vm_area_struct *vma) size = 0; } + vma->vm_ops = &dlfb_vm_ops; + vma->vm_private_data = dlfb; + atomic_inc(&dlfb->mmap_count); return 0; } @@ -1176,7 +1199,6 @@ static void dlfb_deferred_vfree(struct dlfb_data *dlfb, void *mem) /* * Assumes &info->lock held by caller - * Assumes no active clients have framebuffer open */ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info, u32 new_len) { @@ -1188,6 +1210,13 @@ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info new_len = PAGE_ALIGN(new_len); if (new_len > old_len) { + if (atomic_read(&dlfb->mmap_count) > 0) { + dev_warn(info->dev, + "refusing realloc: %d active mmaps\n", + atomic_read(&dlfb->mmap_count)); + return -EBUSY; + } + /* * Alloc system memory for virtual framebuffer */ diff --git a/include/video/udlfb.h b/include/video/udlfb.h index 58fb5732831a4..ab34790d57ecd 100644 --- a/include/video/udlfb.h +++ b/include/video/udlfb.h @@ -56,6 +56,7 @@ struct dlfb_data { spinlock_t damage_lock; struct work_struct damage_work; struct fb_ops ops; + atomic_t mmap_count; /* blit-only rendering path metrics, exposed through sysfs */ atomic_t bytes_rendered; /* raw pixel-bytes driver asked to render */ atomic_t bytes_identical; /* saved effort with backbuffer comparison */ From 7f0a53c2b94ca8ceb6e3203003bb54507daa25c9 Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Mon, 13 Apr 2026 21:53:43 +0800 Subject: [PATCH 290/554] ACPI: scan: Use acpi_dev_put() in object add error paths commit 9c0acc169ac71535477caedea8315f7041c5f07c upstream. After acpi_init_device_object(), the lifetime of struct acpi_device is managed by the driver core through reference counting. Both acpi_add_power_resource() and acpi_add_single_object() call acpi_init_device_object() and then invoke acpi_device_add(). If that fails, their error paths call the release callback directly instead of dropping the device reference through acpi_dev_put(). This bypasses the normal device lifetime rules and frees the object without releasing the reference acquired by device_initialize(), which may lead to a refcount leak. The issue was identified by a static analysis tool I developed and confirmed by manual review. Fix both error paths by using acpi_dev_put() and let the release callback handle the final cleanup. Fixes: 781d737c7466 ("ACPI: Drop power resources driver") Fixes: 718fb0de8ff88 ("ACPI: fix NULL bug for HID/UID string") Cc: All applicable Signed-off-by: Guangshuo Li Link: https://patch.msgid.link/20260413135343.2884481-1-lgs201920130244@gmail.com Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/power.c | 2 +- drivers/acpi/scan.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 7da5ae5594a72..6d16740e1d70a 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -991,7 +991,7 @@ struct acpi_device *acpi_add_power_resource(acpi_handle handle) return device; err: - acpi_release_power_resource(&device->dev); + acpi_dev_put(device); return NULL; } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index ef16d58b29499..50f60da6cf27f 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1910,7 +1910,7 @@ static int acpi_add_single_object(struct acpi_device **child, result = acpi_device_add(device); if (result) { - acpi_device_release(&device->dev); + acpi_dev_put(device); return result; } From 42f83350394a39630db8f45034952417ed415d09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Sch=C3=A4r?= Date: Sat, 11 Apr 2026 11:26:06 +0200 Subject: [PATCH 291/554] ACPI: video: Add backlight=native quirk for Dell OptiPlex 7770 AIO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ad7997f5a01af6f711fe6b6a2df578b964109d49 upstream. The Dell OptiPlex 7770 AIO needs the same quirk as the 7760 AIO. The backlight can be controlled with the native controller, intel_backlight, but not with dell_uart_backlight. I dumped the DSDT using acpidump, acpixtract and iasl, and confirmed that it contains the DELL0501 device. When loading the dell_uart_backlight driver with `rmmod dell_uart_backlight`, `modprobe dell_uart_backlight dyndbg`, it reports "Firmware version: GL_Re_V18". Fixes: cd8e468efb4f ("ACPI: video: Add Dell UART backlight controller detection") Cc: All applicable Signed-off-by: Jan Schär Reviewed-by: Hans de Goede Link: https://patch.msgid.link/20260411092606.47925-1-jan@jschaer.ch Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 4cf74f173c785..4a2132ae28b4d 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -878,6 +878,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 7760 AIO"), }, }, + { + .callback = video_detect_force_native, + /* Dell OptiPlex 7770 AIO */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 7770 AIO"), + }, + }, /* * Models which have nvidia-ec-wmi support, but should not use it. From 70446f52251f241b1f215f9b913c5b8584760c68 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 17 Apr 2026 12:01:12 +0800 Subject: [PATCH 292/554] ACPI: CPPC: Fix related_cpus inconsistency during CPU hotplug commit 75141a770f4f8225d316f6c7e146723a32e9720e upstream. When concurrently bringing up and down two SMT threads of a physical core, many warning call traces occur as below: The issue timeline is as follows: 1. When the system starts, cpufreq: CPU: 220, policy->related_cpus: 220-221, policy->cpus: 220-221 2. Offline CPU 220 and CPU 221. 3. Online CPU 220 - CPU 221 is now offline, as acpi_get_psd_map() use for_each_online_cpu(), so the cpu_data->shared_cpu_map, policy->cpus, and related_cpus has only CPU 220. cpufreq: CPU: 220, policy->related_cpus: 220, policy->cpus: 220 4. Offline CPU 220 5. Online CPU 221, the below call trace occurs: - Since CPU 220 and CPU 221 share one policy, and policy->related_cpus = 220 after step 3, so CPU 221 is not in policy->related_cpus but per_cpu(cpufreq_cpu_data, cpu221) is not NULL. After reverting commit 56eb0c0ed345 ("ACPI: CPPC: Fix remaining for_each_possible_cpu() to use online CPUs"), the issue disappeared. The _PSD (P-State Dependency) defines the hardware-level dependency of frequency control across CPU cores. Since this relationship is a physical attribute of the hardware topology, it remains constant regardless of the online or offline status of the CPUs. Using for_each_online_cpu() in acpi_get_psd_map() is problematic. If a CPU is offline, it will be excluded from the shared_cpu_map. Consequently, if that CPU is brought online later, the kernel will fail to recognize it as part of any shared frequency domain. Switch back to for_each_possible_cpu() to ensure that all cores defined in the ACPI tables are correctly mapped into their respective performance domains from the start. This aligns with the logic of policy->related_cpus, which must encompass all potentially available cores in the domain to prevent logic gaps during CPU hotplug operations. To resolve the original issue regarding the "nosmt" or "nosmt=force" boot parameter, as send_pcc_cmd() function already does if (!desc) continue, so reverting that loop back to for_each_possible_cpu() is ok, only need to change the match_cpc_ptr NULL case in acpi_get_psd_map() to continue as Sean suggested. How to reproduce, on arm64 machine with SMT support which use acpi cppc cpufreq driver: bash test.sh 220 & bash test.sh 221 & The test.sh is as below: while true do echo 0 > /sys/devices/system/cpu/cpu${1}/online sleep 0.5 cat /sys/devices/system/cpu/cpu${1}/cpufreq/related_cpus echo 1 > /sys/devices/system/cpu/cpu${1}/online cat /sys/devices/system/cpu/cpu${1}/cpufreq/related_cpus done CPU: 221 PID: 1119 Comm: cpuhp/221 Kdump: loaded Not tainted 6.6.0debug+ #5 Hardware name: To be filled by O.E.M. S920X20/BC83AMDA01-7270Z, BIOS 20.39 09/04/2024 pstate: a1400009 (NzCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) pc : cpufreq_online+0x8ac/0xa90 lr : cpuhp_cpufreq_online+0x18/0x30 sp : ffff80008739bce0 x29: ffff80008739bce0 x28: 0000000000000000 x27: ffff28400ca32200 x26: 0000000000000000 x25: 0000000000000003 x24: ffffd483503ff000 x23: ffffd483504051a0 x22: ffffd48350024a00 x21: 00000000000000dd x20: 000000000000001d x19: ffff28400ca32000 x18: 0000000000000000 x17: 0000000000000020 x16: ffffd4834e6a3fc8 x15: 0000000000000020 x14: 0000000000000008 x13: 0000000000000001 x12: 00000000ffffffff x11: 0000000000000040 x10: ffffd48350430728 x9 : ffffd4834f087c78 x8 : 0000000000000001 x7 : ffff2840092bdf00 x6 : ffffd483504264f0 x5 : ffffd48350405000 x4 : ffff283f7f95cc60 x3 : 0000000000000000 x2 : ffff53bc2f94b000 x1 : 00000000000000dd x0 : 0000000000000000 Call trace: cpufreq_online+0x8ac/0xa90 cpuhp_cpufreq_online+0x18/0x30 cpuhp_invoke_callback+0x128/0x580 cpuhp_thread_fun+0x110/0x1b0 smpboot_thread_fn+0x140/0x190 kthread+0xec/0x100 ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- Cc: All applicable Fixes: 56eb0c0ed345 ("ACPI: CPPC: Fix remaining for_each_possible_cpu() to use online CPUs") Co-developed-by: Sean Kelley Signed-off-by: Sean Kelley Signed-off-by: Jinjie Ruan [ rjw: Changelog edits ] Link: https://patch.msgid.link/20260417040112.3727756-1-ruanjinjie@huawei.com Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/cppc_acpi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index b59b0100d03c5..ad763a5bf3af5 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -362,7 +362,7 @@ static int send_pcc_cmd(int pcc_ss_id, u16 cmd) end: if (cmd == CMD_WRITE) { if (unlikely(ret)) { - for_each_online_cpu(i) { + for_each_possible_cpu(i) { struct cpc_desc *desc = per_cpu(cpc_desc_ptr, i); if (!desc) @@ -524,13 +524,13 @@ int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data) else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY) cpu_data->shared_type = CPUFREQ_SHARED_TYPE_ANY; - for_each_online_cpu(i) { + for_each_possible_cpu(i) { if (i == cpu) continue; match_cpc_ptr = per_cpu(cpc_desc_ptr, i); if (!match_cpc_ptr) - goto err_fault; + continue; match_pdomain = &(match_cpc_ptr->domain_info); if (match_pdomain->domain != pdomain->domain) From 540fa6a86b912ed7b4794e885865289f02f4e87d Mon Sep 17 00:00:00 2001 From: Shivam Kalra Date: Sun, 26 Apr 2026 19:38:41 +0530 Subject: [PATCH 293/554] ACPI: video: force native backlight on HP OMEN 16 (8A44) commit 4b506ea5351a1f5937ac632a4a5c35f6f796cc41 upstream. The HP OMEN 16 Gaming Laptop (board name 8A44) has a mux-less hybrid GPU configuration with AMD Rembrandt (Radeon 680M) and NVIDIA GA104 (RTX 3070 Ti). The internal eDP panel is wired to the AMD iGPU. When Nouveau loads without GSP firmware, the ACPI video backlight device (acpi_video0) gets registered alongside the native AMD backlight (amdgpu_bl2). In this state, writes to amdgpu_bl2 update the software brightness value but fail to change the physical panel brightness. Force native backlight to prevent acpi_video0 from registering. Confirmed that booting with acpi_backlight=native resolves the issue. Cc: All applicable Signed-off-by: Shivam Kalra Link: https://patch.msgid.link/20260426-omen-16-backlight-fix-v1-1-62364f268ea6@zohomail.in Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 4a2132ae28b4d..2c120ade8f51a 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -907,6 +907,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 15 3535"), }, }, + { + .callback = video_detect_force_native, + /* HP OMEN Gaming Laptop 16-n0xxx */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "OMEN by HP Gaming Laptop 16-n0xxx"), + }, + }, /* * x86 android tablets which directly control the backlight through From 2c5b8eeea006eb694c81631cd5713d494b80be90 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Mon, 13 Apr 2026 20:06:01 +0100 Subject: [PATCH 294/554] tracepoint: balance regfunc() on func_add() failure in tracepoint_add_func() commit fad217e16fded7f3c09f8637b0f6a224d58b5f2e upstream. When a tracepoint goes through the 0 -> 1 transition, tracepoint_add_func() invokes the subsystem's ext->regfunc() before attempting to install the new probe via func_add(). If func_add() then fails (for example, when allocate_probes() cannot allocate a new probe array under memory pressure and returns -ENOMEM), the function returns the error without calling the matching ext->unregfunc(), leaving the side effects of regfunc() behind with no installed probe to justify them. For syscall tracepoints this is particularly unpleasant: syscall_regfunc() bumps sys_tracepoint_refcount and sets SYSCALL_TRACEPOINT on every task. After a leaked failure, the refcount is stuck at a non-zero value with no consumer, and every task continues paying the syscall trace entry/exit overhead until reboot. Other subsystems providing regfunc()/unregfunc() pairs exhibit similarly scoped persistent state. Mirror the existing 1 -> 0 cleanup and call ext->unregfunc() in the func_add() error path, gated on the same condition used there so the unwind is symmetric with the registration. Fixes: 8cf868affdc4 ("tracing: Have the reg function allow to fail") Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260413190601.21993-1-devnexen@gmail.com Signed-off-by: David Carlier Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/tracepoint.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 62719d2941c90..1abaeb32a8e1e 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -291,6 +291,8 @@ static int tracepoint_add_func(struct tracepoint *tp, lockdep_is_held(&tracepoints_mutex)); old = func_add(&tp_funcs, func, prio); if (IS_ERR(old)) { + if (tp->ext && tp->ext->unregfunc && !static_key_enabled(&tp->key)) + tp->ext->unregfunc(); WARN_ON_ONCE(warn && PTR_ERR(old) != -ENOMEM); return PTR_ERR(old); } From 9a2d03d324b2c74a6014d607cec7555036f30cb0 Mon Sep 17 00:00:00 2001 From: Sina Hassani Date: Fri, 10 Apr 2026 11:32:44 -0700 Subject: [PATCH 295/554] iommufd: Fix a race with concurrent allocation and unmap commit 8602018b1f17fbdaa5e5d79f4c8603ad20640c12 upstream. iopt_unmap_iova_range() releases the lock on iova_rwsem inside the loop body when getting to the more expensive unmap operations. This is fine on its own, except the loop condition is based on the first area that matches the unmap address range. If a concurrent call to map picks an area that was unmapped in previous iterations, the loop mistakenly tries to unmap it. This is reproducible by having one userspace thread map buffers and pass them to another thread that unmaps them. The problem manifests as EBUSY errors with single page mappings. Fix this by advancing the start pointer after unmapping an area. This ensures each iteration only examines the IOVA range that remains mapped, which is guaranteed not to have overlaps. Cc: stable@vger.kernel.org Fixes: 51fe6141f0f6 ("iommufd: Data structure to provide IOVA to PFN mapping") Link: https://patch.msgid.link/r/CAAJpGJSR4r_ds1JOjmkqHtsBPyxu8GntoeW08Sk5RNQPmgi+tg@mail.gmail.com Signed-off-by: Sina Hassani Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/iommufd/io_pagetable.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 75d60f2ad9008..a4d7a9d435603 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -777,6 +777,16 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, unmapped_bytes += area_last - area_first + 1; down_write(&iopt->iova_rwsem); + + /* + * After releasing the iova_rwsem concurrent allocation could + * place new areas at IOVAs we have already unmapped. Keep + * moving the start of the search forward to ignore the area + * already unmapped. + */ + if (area_last >= last) + break; + start = area_last + 1; } out_unlock_iova: From 0f0c0c1397a42aacaacae828206ee1b921623952 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 26 Mar 2026 14:52:41 +0000 Subject: [PATCH 296/554] ASoC: SOF: Don't allow pointer operations on unconfigured streams commit c5b6285aae050ff1c3ea824ca3d88ac4be1e69c8 upstream. When reporting the pointer for a compressed stream we report the current I/O frame position by dividing the position by the number of channels multiplied by the number of container bytes. These values default to 0 and are only configured as part of setting the stream parameters so this allows a divide by zero to be configured. Validate that they are non zero, returning an error if not Fixes: c1a731c71359 ("ASoC: SOF: compress: Add support for computing timestamps") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260326-asoc-compress-tstamp-params-v1-1-3dc735b3d599@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/sof/compress.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/sof/compress.c b/sound/soc/sof/compress.c index 90b932ae3bab2..593b8c7474846 100644 --- a/sound/soc/sof/compress.c +++ b/sound/soc/sof/compress.c @@ -371,6 +371,9 @@ static int sof_compr_pointer(struct snd_soc_component *component, if (!spcm) return -EINVAL; + if (!sstream->channels || !sstream->sample_container_bytes) + return -EBUSY; + tstamp->sampling_rate = sstream->sampling_rate; tstamp->copied_total = sstream->copied_total; tstamp->pcm_io_frames = div_u64(spcm->stream[cstream->direction].posn.dai_posn, From 8c4339dbab499d37ccd3f996989413befbf5f086 Mon Sep 17 00:00:00 2001 From: Quan Zhou Date: Wed, 25 Feb 2026 17:47:22 +0800 Subject: [PATCH 297/554] wifi: mt76: mt7925: fix incorrect TLV length in CLC command commit 62e037aa8cf5a69b7ea63336705a35c897b9db2b upstream. The previous implementation of __mt7925_mcu_set_clc() set the TLV length field (.len) incorrectly during CLC command construction. The length was initialized as sizeof(req) - 4, regardless of the actual segment length. This could cause the WiFi firmware to misinterpret the command payload, resulting in command execution errors. This patch moves the TLV length assignment to after the segment is selected, and sets .len to sizeof(req) + seg->len - 4, matching the actual command content. This ensures the firmware receives the correct TLV length and parses the command properly. Fixes: c948b5da6bbe ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips") Cc: stable@vger.kernel.org Signed-off-by: Quan Zhou Acked-by: Sean Wang Link: https://patch.msgid.link/f56ae0e705774dfa8aab3b99e5bbdc92cd93523e.1772011204.git.quan.zhou@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index c12b71b71cfc7..a411838147ec5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -3324,7 +3324,6 @@ __mt7925_mcu_set_clc(struct mt792x_dev *dev, u8 *alpha2, u8 rsvd[64]; } __packed req = { .tag = cpu_to_le16(0x3), - .len = cpu_to_le16(sizeof(req) - 4), .idx = idx, .env = env_cap, @@ -3353,6 +3352,7 @@ __mt7925_mcu_set_clc(struct mt792x_dev *dev, u8 *alpha2, memcpy(req.type, rule->type, 2); req.size = cpu_to_le16(seg->len); + req.len = cpu_to_le16(sizeof(req) + seg->len - 4); dev->phy.clc_chan_conf = clc->ver == 1 ? 0xff : rule->flag; skb = __mt76_mcu_msg_alloc(&dev->mt76, &req, le16_to_cpu(req.size) + sizeof(req), From 21a316f27095df2898e02dd0d9d9c92dd0033470 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 24 Mar 2026 09:23:23 +0100 Subject: [PATCH 298/554] spi: rockchip: fix controller deregistration commit 53e7a16070feb7d1d4d81a583eaac5e25048b9c3 upstream. Make sure to deregister the controller before freeing underlying resources like DMA channels during driver unbind. Fixes: 64e36824b32b ("spi/rockchip: add driver for Rockchip RK3xxx SoCs integrated SPI") Cc: stable@vger.kernel.org # 3.17 Cc: addy ke Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260324082326.901043-3-johan@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-rockchip.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 1a6381de6f33d..45b1ad40a7efc 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -909,7 +909,7 @@ static int rockchip_spi_probe(struct platform_device *pdev) break; } - ret = devm_spi_register_controller(&pdev->dev, ctlr); + ret = spi_register_controller(ctlr); if (ret < 0) { dev_err(&pdev->dev, "Failed to register controller\n"); goto err_free_dma_rx; @@ -937,6 +937,8 @@ static void rockchip_spi_remove(struct platform_device *pdev) pm_runtime_get_sync(&pdev->dev); + spi_unregister_controller(ctlr); + pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); From ccee9833e33ba810a57f9a6e1f75657bf62df3ea Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Sat, 25 Apr 2026 18:38:28 +0900 Subject: [PATCH 299/554] ksmbd: rewrite stop_sessions() with restartable iteration commit c444139cb747bf6de1922b39900fdf02281490f4 upstream. stop_sessions() walks conn_list with hash_for_each() and, for every entry, drops conn_list_lock across the transport ->shutdown() call before re-acquiring the read lock to continue the loop. The hash walk relies on cross-iteration state (the current bucket and the hlist position), which is not preserved across unlock/relock: if another thread performs a list mutation during the unlocked window, the ongoing iteration becomes unreliable and can re-visit connections that have already been handled or skip connections that have not. The outer `if (!hash_empty(conn_list)) goto again;` retry masks the symptom in the common case but does not address the unsafe iteration itself. Reframe the loop so it never relies on iterator state across unlock/relock. Under conn_list_lock held for read, pick the first connection whose ->shutdown() has not yet been issued by this path, pin it by taking an extra reference, record that fact on the connection and mark it EXITING while still inside the locked walk, then drop the lock. Then call ->shutdown() outside the lock, drop the pin (freeing the connection if the handler already released its reference), and restart from the top. Use a new per-connection flag, conn->stop_called, as the "shutdown issued from stop_sessions()" marker rather than reusing the status state. ksmbd_conn_set_exiting() is also invoked by ksmbd_sessions_deregister() on sibling channels of a multichannel session without issuing a transport shutdown, so treating KSMBD_SESS_EXITING as "already handled here" would skip connections that still need shutdown() to wake their handler out of recv(), leaving the outer retry waiting indefinitely for the hash to drain. stop_sessions() is serialised by init_lock in ksmbd_conn_transport_destroy(), so writing stop_called under the read lock has no other writer. Set EXITING inside the locked walk so the selection, the stop_called marker, and the status transition all happen together, and guard against regressing a connection that has already advanced to KSMBD_SESS_RELEASING on its own (for example, if the handler exited its receive loop for an unrelated reason between teardown steps). When the pin drop is the last put, release the transport and pair ida_destroy(&target->async_ida) with the ida_init() done in ksmbd_conn_alloc(), so stop_sessions() retiring a connection on its own does not leak the xarray backing of the embedded async_ida. The outer retry with msleep() is kept to wait for handler threads to reach ksmbd_conn_free() and drain the hash. Observed with an instrumented build that logs one line per visit and widens the unlocked window before ->shutdown() by 200 ms, under five concurrent cifs mounts (nosharesock, one connection each): * Current code: the same connection address is revisited many times during a single stop_sessions() call and ->shutdown() is invoked well beyond the number of live connections before the hash finally drains. * Rewritten code: each live connection produces exactly one ->shutdown() call; the function returns as soon as the hash is empty. Functional teardown via `ksmbd.control --shutdown` with the same five mounts completes cleanly on the rewritten path. Performance is observably unchanged. Tearing down N concurrent nosharesock cifs connections with `ksmbd.control --shutdown` + `rmmod ksmbd` takes essentially the same wall time before and after the rewrite: N before after 10 4.93s 5.34s 30 7.34s 7.03s 50 7.31s 7.01s (3-run avg: 7.04s vs 7.25s) 100 6.98s 6.78s 200 6.77s 6.89s and the number of ->shutdown() calls equals the number of live connections on both paths when the race is not widened. The teardown is dominated by the msleep(100)-based outer retry waiting for handler threads to run ksmbd_conn_free(), not by the iteration itself; the restartable loop's worst-case O(N^2) visit cost is in the microseconds even at N=200 and sits far below the msleep(100) granularity. Applied alone on top of ksmbd-for-next-next, this patch does not introduce a new leak site. Under the same reproducer (10x concurrent-holders + ss -K + ksmbd.control --shutdown + rmmod), the tree still shows the pre-existing per-connection transport leak count that arises when the last refcount drop lands in one of ksmbd_conn_r_count_dec(), __free_opinfo() or session_fd_check() - all of which end with a bare kfree() today. kmemleak backtraces for the unreferenced objects point into the TCP accept path (sk_clone -> inet_csk_clone_lock, sock_alloc_inode) and none involve stop_sessions(). Plugging those bare-kfree sites is the responsibility of the follow-up patch. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org Signed-off-by: DaeMyung Kang Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/connection.c | 46 +++++++++++++++++++++++++++++++------- fs/smb/server/connection.h | 1 + 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index e810b339dd4e5..b4ef62b9e660c 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -476,24 +476,54 @@ int ksmbd_conn_transport_init(void) static void stop_sessions(void) { - struct ksmbd_conn *conn; + struct ksmbd_conn *conn, *target; struct ksmbd_transport *t; + bool any; int bkt; + /* + * Serialised via init_lock; no concurrent stop_sessions() can + * touch conn->stop_called, so writing it under the read lock is + * safe. + */ again: + target = NULL; + any = false; down_read(&conn_list_lock); hash_for_each(conn_list, bkt, conn, hlist) { - t = conn->transport; - ksmbd_conn_set_exiting(conn); - if (t->ops->shutdown) { - up_read(&conn_list_lock); + any = true; + if (conn->stop_called) + continue; + atomic_inc(&conn->refcnt); + conn->stop_called = true; + /* + * Mark the connection EXITING while still holding the + * read lock so the selection and the status transition + * happen together. Do not regress a connection that has + * already advanced to RELEASING on its own (e.g. the + * handler exited its receive loop for an unrelated + * reason). + */ + if (READ_ONCE(conn->status) != KSMBD_SESS_RELEASING) + ksmbd_conn_set_exiting(conn); + target = conn; + break; + } + up_read(&conn_list_lock); + + if (target) { + t = target->transport; + if (t->ops->shutdown) t->ops->shutdown(t); - down_read(&conn_list_lock); + if (atomic_dec_and_test(&target->refcnt)) { + ida_destroy(&target->async_ida); + t->ops->free_transport(t); + kfree(target); } + goto again; } - up_read(&conn_list_lock); - if (!hash_empty(conn_list)) { + if (any) { msleep(100); goto again; } diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 7f9bcd9817b54..50d1e5a54f33c 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -48,6 +48,7 @@ struct ksmbd_conn { struct mutex srv_mutex; int status; unsigned int cli_cap; + bool stop_called; union { __be32 inet_addr; #if IS_ENABLED(CONFIG_IPV6) From 738ec97b1855df6c08fe2369f798fa0b972e556b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 May 2026 08:58:57 +0200 Subject: [PATCH 300/554] KVM: x86: Fix shadow paging use-after-free due to unexpected GFN commit 0cb2af2ea66ad8ff195c156ea690f11216285bdf upstream. The shadow MMU computes GFNs for direct shadow pages using sp->gfn plus the SPTE index. This assumption breaks for shadow paging if the guest page tables are modified between VM entries (similar to commit aad885e77496, "KVM: x86/mmu: Drop/zap existing present SPTE even when creating an MMIO SPTE", 2026-03-27). The flow is as follows: - a PDE is installed for a 2MB mapping, and a page in that area is accessed. KVM creates a kvm_mmu_page consisting of 512 4KB pages; the kvm_mmu_page is marked by FNAME(fetch) as direct-mapped because the guest's mapping is a huge page (and thus contiguous). - the PDE mapping is changed from outside the guest. - the guest accesses another page in the same 2MB area. KVM installs a new leaf SPTE and rmap entry; the SPTE uses the "correct" GFN (i.e. based on the new mapping, as changed in the previous step) but that GFN is outside of the [sp->gfn, sp->gfn + 511] range; therefore the rmap entry cannot be found and removed when the kvm_mmu_page is zapped. - the memslot that covers the first 2MB mapping is deleted, and the kvm_mmu_page for the now-invalid GPA is zapped. However, rmap_remove() only looks at the [sp->gfn, sp->gfn + 511] range established in step 1, and fails to find the rmap entry that was recorded by step 3. - any operation that causes an rmap walk for the same page accessed by step 3 then walks a stale rmap and dereferences a freed kvm_mmu_page. This includes dirty logging or MMU notifier invalidations (e.g., from MADV_DONTNEED). The underlying issue is that KVM's walking of shadow PTEs assumes that if a SPTE is present when KVM wants to install a non-leaf SPTE, then the existing kvm_mmu_page must be for the correct gfn. Because the only way for the gfn to be wrong is if KVM messed up and failed to zap a SPTE... which shouldn't happen, but *actually* only happens in response to a guest write. That bug dates back literally forever, as even the first version of KVM assumes that the GFN matches and walks into the "wrong" shadow page. However, that was only an imprecision until 2032a93d66fa ("KVM: MMU: Don't allocate gfns page for direct mmu pages") came along. Fix it by checking for a target gfn mismatch and zapping the existing SPTE. That way the old SP and rmap entries are gone, KVM installs the rmap in the right location, and everyone is happy. Fixes: 2032a93d66fa ("KVM: MMU: Don't allocate gfns page for direct mmu pages") Fixes: 6aa8b732ca01 ("kvm: userspace interface") Reported-by: Alexander Bulekov Reported-by: Fred Griffoul Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Link: https://patch.msgid.link/20260503201029.106481-1-pbonzini@redhat.com/ Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/mmu/mmu.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index dad7abb1112b7..0bd0cb8992c9f 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -182,6 +182,8 @@ static struct kmem_cache *pte_list_desc_cache; struct kmem_cache *mmu_page_header_cache; static void mmu_spte_set(u64 *sptep, u64 spte); +static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, + u64 *spte, struct list_head *invalid_list); struct kvm_mmu_role_regs { const unsigned long cr0; @@ -1287,19 +1289,6 @@ static void drop_spte(struct kvm *kvm, u64 *sptep) rmap_remove(kvm, sptep); } -static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush) -{ - struct kvm_mmu_page *sp; - - sp = sptep_to_sp(sptep); - WARN_ON_ONCE(sp->role.level == PG_LEVEL_4K); - - drop_spte(kvm, sptep); - - if (flush) - kvm_flush_remote_tlbs_sptep(kvm, sptep); -} - /* * Write-protect on the specified @sptep, @pt_protect indicates whether * spte write-protection is caused by protecting shadow page table. @@ -2466,7 +2455,8 @@ static struct kvm_mmu_page *kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu, { union kvm_mmu_page_role role; - if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) + if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep) && + spte_to_child_sp(*sptep) && spte_to_child_sp(*sptep)->gfn == gfn) return ERR_PTR(-EEXIST); role = kvm_mmu_child_role(sptep, direct, access); @@ -2544,13 +2534,16 @@ static void __link_shadow_page(struct kvm *kvm, BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); - /* - * If an SPTE is present already, it must be a leaf and therefore - * a large one. Drop it, and flush the TLB if needed, before - * installing sp. - */ - if (is_shadow_present_pte(*sptep)) - drop_large_spte(kvm, sptep, flush); + if (is_shadow_present_pte(*sptep)) { + struct kvm_mmu_page *parent_sp; + LIST_HEAD(invalid_list); + + parent_sp = sptep_to_sp(sptep); + WARN_ON_ONCE(parent_sp->role.level == PG_LEVEL_4K); + + mmu_page_zap_pte(kvm, parent_sp, sptep, &invalid_list); + kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, true); + } spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp)); From 6200f41d6fcf2ac7e24866431e381cbc914560e4 Mon Sep 17 00:00:00 2001 From: Sam Edwards Date: Tue, 5 May 2026 18:43:03 -0700 Subject: [PATCH 301/554] ceph: fix num_ops off-by-one when crypto allocation fails commit a0d9555bf9eaeba34fe6b6bb86f442fe08ba3842 upstream. move_dirty_folio_in_page_array() may fail if the file is encrypted, the dirty folio is not the first in the batch, and it fails to allocate a bounce buffer to hold the ciphertext. When that happens, ceph_process_folio_batch() simply redirties the folio and flushes the current batch -- it can retry that folio in a future batch. However, if this failed folio is not contiguous with the last folio that did make it into the batch, then ceph_process_folio_batch() has already incremented `ceph_wbc->num_ops`; because it doesn't follow through and add the discontiguous folio to the array, ceph_submit_write() -- which expects that `ceph_wbc->num_ops` accurately reflects the number of contiguous ranges (and therefore the required number of "write extent" ops) in the writeback -- will panic the kernel: BUG_ON(ceph_wbc->op_idx + 1 != req->r_num_ops); This issue can be reproduced on affected kernels by writing to fscrypt-enabled CephFS file(s) with a 4KiB-written/4KiB-skipped/repeat pattern (total filesize should not matter) and gradually increasing the system's memory pressure until a bounce buffer allocation fails. Fix this crash by decrementing `ceph_wbc->num_ops` back to the correct value when move_dirty_folio_in_page_array() fails, but the folio already started counting a new (i.e. still-empty) extent. The defect corrected by this patch has existed since 2022 (see first `Fixes:`), but another bug blocked multi-folio encrypted writeback until recently (see second `Fixes:`). The second commit made it into 6.18.16, 6.19.6, and 7.0-rc1, unmasking the panic in those versions. This patch therefore fixes a regression (panic) introduced by cac190c7674f. Cc: stable@vger.kernel.org Fixes: d55207717ded ("ceph: add encryption support to writepage and writepages") Fixes: cac190c7674f ("ceph: fix write storm on fscrypted files") Signed-off-by: Sam Edwards Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/addr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 390f122feeaaf..3af6795cb3c15 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1373,6 +1373,10 @@ int ceph_process_folio_batch(struct address_space *mapping, rc = move_dirty_folio_in_page_array(mapping, wbc, ceph_wbc, folio); if (rc) { + /* Did we just begin a new contiguous op? Nevermind! */ + if (ceph_wbc->len == 0) + ceph_wbc->num_ops--; + rc = 0; folio_redirty_for_writepage(wbc, folio); folio_unlock(folio); From 0d00b9015069712944934bab09eaa6c542143049 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Wed, 15 Apr 2026 10:24:50 +0800 Subject: [PATCH 302/554] flow_dissector: do not dissect PPPoE PFC frames [ Upstream commit d6c19b31a3c1d519fabdcf0aa239e6b6109b9473 ] RFC 2516 Section 7 states that Protocol Field Compression (PFC) is NOT RECOMMENDED for PPPoE. In practice, pppd does not support negotiating PFC for PPPoE sessions, and the flow dissector driver has assumed an uncompressed frame until the blamed commit. During the review process of that commit [1], support for PFC is suggested. However, having a compressed (1-byte) protocol field means the subsequent PPP payload is shifted by one byte, causing 4-byte misalignment for the network header and an unaligned access exception on some architectures. The exception can be reproduced by sending a PPPoE PFC frame to an ethernet interface of a MIPS board, with RPS enabled, even if no PPPoE session is active on that interface: $ 0 : 00000000 80c40000 00000000 85144817 $ 4 : 00000008 00000100 80a75758 81dc9bb8 $ 8 : 00000010 8087ae2c 0000003d 00000000 $12 : 000000e0 00000039 00000000 00000000 $16 : 85043240 80a75758 81dc9bb8 00006488 $20 : 0000002f 00000007 85144810 80a70000 $24 : 81d1bda0 00000000 $28 : 81dc8000 81dc9aa8 00000000 805ead08 Hi : 00009d51 Lo : 2163358a epc : 805e91f0 __skb_flow_dissect+0x1b0/0x1b50 ra : 805ead08 __skb_get_hash_net+0x74/0x12c Status: 11000403 KERNEL EXL IE Cause : 40800010 (ExcCode 04) BadVA : 85144817 PrId : 0001992f (MIPS 1004Kc) Call Trace: [<805e91f0>] __skb_flow_dissect+0x1b0/0x1b50 [<805ead08>] __skb_get_hash_net+0x74/0x12c [<805ef330>] get_rps_cpu+0x1b8/0x3fc [<805fca70>] netif_receive_skb_list_internal+0x324/0x364 [<805fd120>] napi_complete_done+0x68/0x2a4 [<8058de5c>] mtk_napi_rx+0x228/0xfec [<805fd398>] __napi_poll+0x3c/0x1c4 [<805fd754>] napi_threaded_poll_loop+0x234/0x29c [<805fd848>] napi_threaded_poll+0x8c/0xb0 [<80053544>] kthread+0x104/0x12c [<80002bd8>] ret_from_kernel_thread+0x14/0x1c Code: 02d51821 1060045b 00000000 <8c640000> 3084000f 2c820005 144001a2 00042080 8e220000 To reduce the attack surface and maintain performance, do not process PPPoE PFC frames. [1] https://lore.kernel.org/r/20220630231016.GA392@debian.home Fixes: 46126db9c861 ("flow_dissector: Add PPPoE dissectors") Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260415022456.141758-1-qingfang.deng@linux.dev Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/flow_dissector.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1b61bb25ba0e5..2a98f5fa74eb0 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1374,16 +1374,13 @@ bool __skb_flow_dissect(const struct net *net, break; } - /* least significant bit of the most significant octet - * indicates if protocol field was compressed + /* PFC (compressed 1-byte protocol) frames are not processed. + * A compressed protocol field has the least significant bit of + * the most significant octet set, which will fail the following + * ppp_proto_is_valid(), returning FLOW_DISSECT_RET_OUT_BAD. */ ppp_proto = ntohs(hdr->proto); - if (ppp_proto & 0x0100) { - ppp_proto = ppp_proto >> 8; - nhoff += PPPOE_SES_HLEN - 1; - } else { - nhoff += PPPOE_SES_HLEN; - } + nhoff += PPPOE_SES_HLEN; if (ppp_proto == PPP_IP) { proto = htons(ETH_P_IP); From ff9e4d5e9bbaeadea98e6438ed72a636219f7aa8 Mon Sep 17 00:00:00 2001 From: Gang Yan Date: Wed, 6 May 2026 13:20:41 +0200 Subject: [PATCH 303/554] mptcp: sync the msk->sndbuf at accept() time commit fcf04b14334641f4b0b8647824480935e9416d52 upstream. On passive MPTCP connections, the msk sndbuf is not updated correctly. The root cause is an order issue in the accept path: - tcp_check_req() -> subflow_syn_recv_sock() -> mptcp_sk_clone_init() calls __mptcp_propagate_sndbuf() to copy the ssk sndbuf into msk - Later, tcp_child_process() -> tcp_init_transfer() -> tcp_sndbuf_expand() grows the ssk sndbuf. So __mptcp_propagate_sndbuf() runs before the ssk sndbuf has been expanded and the msk ends up with a much smaller sndbuf than the subflow: MPTCP: msk->sndbuf:20480, msk->first->sndbuf:2626560 Fix this by moving the __mptcp_propagate_sndbuf() call from mptcp_sk_clone_init() -- the ssk sndbuf is not yet finalized there -- to __mptcp_propagate_sndbuf() at accept() time, when the ssk sndbuf has been fully expanded by tcp_sndbuf_expand(). Fixes: 8005184fd1ca ("mptcp: refactor sndbuf auto-tuning") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/602 Signed-off-by: Gang Yan Acked-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260420-net-mptcp-sync-sndbuf-accept-v1-1-e3523e3aeb44@kernel.org Signed-off-by: Paolo Abeni [ No conflicts, but move __mptcp_propagate_sndbuf() above the for-loop (mptcp_for_each_subflow()) present in this version, which will modify 'subflow' used by __mptcp_propagate_sndbuf() in this new patch. ] Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Sasha Levin --- net/mptcp/protocol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 09e1a93b7daab..c805d36fe50d5 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3428,7 +3428,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, * uses the correct data */ mptcp_copy_inaddrs(nsk, ssk); - __mptcp_propagate_sndbuf(nsk, ssk); mptcp_rcv_space_init(msk, ssk); msk->rcvq_space.time = mptcp_stamp(); @@ -4027,6 +4026,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, msk = mptcp_sk(newsk); msk->in_accept_queue = 0; + __mptcp_propagate_sndbuf(newsk, mptcp_subflow_tcp_sock(subflow)); + /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ From 9dea2aeabc86c2c441d2c89a5b6ebbfc96e56bfb Mon Sep 17 00:00:00 2001 From: Yi Kuo Date: Fri, 8 May 2026 10:15:47 +0200 Subject: [PATCH 304/554] smb: client/smbdirect: fix MR registration for coalesced SG lists commit 9900b9fee5a0e0f72d7c744b37c7c851d5785ac6 upstream. The stable backport to < 7.1 patches a different file. Also the Fixes tag below is adjusted for the old code path. ib_dma_map_sg() modifies the provided scatterlist and returns the number of mapped entries, which can be fewer than the requested mr->sgt.nents if the DMA controller coalesces contiguous memory segments. Passing the original, uncoalesced count to ib_map_mr_sg() causes memory registration failures if coalescing actually occurs. Capture the actual mapped count returned by ib_dma_map_sg() and pass it to ib_map_mr_sg() to ensure correct MR registration. Also update the ib_dma_map_sg() error logging to drop the error pointer formatting, since the return value is an integer count rather than an error code. Ensure a proper error code (-EIO) is assigned when DMA mapping or MR registration fails. Fixes: c7398583340a ("CIFS: SMBD: Implement RDMA memory registration") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221408 Reviewed-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Yi Kuo Signed-off-by: Steve French Cc: stable@vger.kernel.org Signed-off-by: Stefan Metzmacher Signed-off-by: Sasha Levin --- fs/smb/client/smbdirect.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index ff44a2dc49938..e2b20219ba2c7 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2895,7 +2895,7 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; struct smbdirect_mr_io *mr; - int rc, num_pages; + int rc, num_pages, num_mapped; struct ib_reg_wr *reg_wr; num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); @@ -2923,18 +2923,21 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, num_pages, iov_iter_count(iter), sp->max_frmr_depth); smbd_iter_to_mr(iter, &mr->sgt, sp->max_frmr_depth); - rc = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); - if (!rc) { - log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", - num_pages, mr->dir, rc); + num_mapped = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + if (!num_mapped) { + log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x num_mapped=%x\n", + num_pages, mr->dir, num_mapped); + rc = -EIO; goto dma_map_error; } - rc = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE); - if (rc != mr->sgt.nents) { + rc = ib_map_mr_sg(mr->mr, mr->sgt.sgl, num_mapped, NULL, PAGE_SIZE); + if (rc != num_mapped) { log_rdma_mr(ERR, - "ib_map_mr_sg failed rc = %d nents = %x\n", - rc, mr->sgt.nents); + "ib_map_mr_sg failed rc = %d num_mapped = %x\n", + rc, num_mapped); + if (rc >= 0) + rc = -EIO; goto map_mr_error; } From edd446ee7cd3d02cac246168063d5b3e9ea68460 Mon Sep 17 00:00:00 2001 From: Zhengchuan Liang Date: Sun, 22 Mar 2026 11:46:08 -0700 Subject: [PATCH 305/554] net: af_key: zero aligned sockaddr tail in PF_KEY exports [ Upstream commit 426c355742f02cf743b347d9d7dbdc1bfbfa31ef ] PF_KEY export paths use `pfkey_sockaddr_size()` when reserving sockaddr payload space, so IPv6 addresses occupy 32 bytes on the wire. However, `pfkey_sockaddr_fill()` initializes only the first 28 bytes of `struct sockaddr_in6`, leaving the final 4 aligned bytes uninitialized. Not every PF_KEY message is affected. The state and policy dump builders already zero the whole message buffer before filling the sockaddr payloads. Keep the fix to the export paths that still append aligned sockaddr payloads with plain `skb_put()`: - `SADB_ACQUIRE` - `SADB_X_NAT_T_NEW_MAPPING` - `SADB_X_MIGRATE` Fix those paths by clearing only the aligned sockaddr tail after `pfkey_sockaddr_fill()`. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Fixes: 08de61beab8a ("[PFKEYV2]: Extension for dynamic update of endpoint address(es)") Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Xiao Liu Signed-off-by: Zhengchuan Liang Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/key/af_key.c | 52 +++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index ceaa82bc78acc..e01939ab81039 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -757,6 +757,22 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port return 0; } +static unsigned int pfkey_sockaddr_fill_zero_tail(const xfrm_address_t *xaddr, + __be16 port, + struct sockaddr *sa, + unsigned short family) +{ + unsigned int prefixlen; + int sockaddr_len = pfkey_sockaddr_len(family); + int sockaddr_size = pfkey_sockaddr_size(family); + + prefixlen = pfkey_sockaddr_fill(xaddr, port, sa, family); + if (sockaddr_size > sockaddr_len) + memset((u8 *)sa + sockaddr_len, 0, sockaddr_size - sockaddr_len); + + return prefixlen; +} + static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, int add_keys, int hsc) { @@ -3206,9 +3222,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = - pfkey_sockaddr_fill(&x->props.saddr, 0, - (struct sockaddr *) (addr + 1), - x->props.family); + pfkey_sockaddr_fill_zero_tail(&x->props.saddr, 0, + (struct sockaddr *)(addr + 1), + x->props.family); if (!addr->sadb_address_prefixlen) BUG(); @@ -3221,9 +3237,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = - pfkey_sockaddr_fill(&x->id.daddr, 0, - (struct sockaddr *) (addr + 1), - x->props.family); + pfkey_sockaddr_fill_zero_tail(&x->id.daddr, 0, + (struct sockaddr *)(addr + 1), + x->props.family); if (!addr->sadb_address_prefixlen) BUG(); @@ -3421,9 +3437,9 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = - pfkey_sockaddr_fill(&x->props.saddr, 0, - (struct sockaddr *) (addr + 1), - x->props.family); + pfkey_sockaddr_fill_zero_tail(&x->props.saddr, 0, + (struct sockaddr *)(addr + 1), + x->props.family); if (!addr->sadb_address_prefixlen) BUG(); @@ -3443,9 +3459,9 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = - pfkey_sockaddr_fill(ipaddr, 0, - (struct sockaddr *) (addr + 1), - x->props.family); + pfkey_sockaddr_fill_zero_tail(ipaddr, 0, + (struct sockaddr *)(addr + 1), + x->props.family); if (!addr->sadb_address_prefixlen) BUG(); @@ -3474,15 +3490,15 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, switch (type) { case SADB_EXT_ADDRESS_SRC: addr->sadb_address_prefixlen = sel->prefixlen_s; - pfkey_sockaddr_fill(&sel->saddr, 0, - (struct sockaddr *)(addr + 1), - sel->family); + pfkey_sockaddr_fill_zero_tail(&sel->saddr, 0, + (struct sockaddr *)(addr + 1), + sel->family); break; case SADB_EXT_ADDRESS_DST: addr->sadb_address_prefixlen = sel->prefixlen_d; - pfkey_sockaddr_fill(&sel->daddr, 0, - (struct sockaddr *)(addr + 1), - sel->family); + pfkey_sockaddr_fill_zero_tail(&sel->daddr, 0, + (struct sockaddr *)(addr + 1), + sel->family); break; default: return -EINVAL; From 53e821fd9baec14bf19b462b5e47359b0af52390 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Mar 2026 12:40:52 +0100 Subject: [PATCH 306/554] KVM: SVM: check validity of VMCB controls when returning from SMM commit be5fa8737d42c5ba16d2ea72c23681f8abbb07e8 upstream. The VMCB12 is stored in guest memory and can be mangled while in SMM; it is then reloaded by svm_leave_smm(), but it is not checked again for validity. Move the cached vmcb12 control and save consistency checks out of svm_set_nested_state() and into a helper, and reuse it in svm_leave_smm(). Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 12 ++++++++++-- arch/x86/kvm/svm/svm.c | 4 ++++ arch/x86/kvm/svm/svm.h | 1 + 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 17c6ff201fa1d..66b8955a1658e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -484,6 +484,15 @@ void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm, __nested_copy_vmcb_save_to_cache(&svm->nested.save, save); } +int nested_svm_check_cached_vmcb12(struct kvm_vcpu *vcpu) +{ + if (!nested_vmcb_check_save(vcpu) || + !nested_vmcb_check_controls(vcpu)) + return -EINVAL; + + return 0; +} + /* * Synchronize fields that are written by the processor, so that * they can be copied back into the vmcb12. @@ -990,8 +999,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); - if (!nested_vmcb_check_save(vcpu) || - !nested_vmcb_check_controls(vcpu)) { + if (nested_svm_check_cached_vmcb12(vcpu) < 0) { vmcb12->control.exit_code = SVM_EXIT_ERR; vmcb12->control.exit_code_hi = -1u; vmcb12->control.exit_info_1 = 0; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7795e4777da7e..fc22d106a3141 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4885,6 +4885,10 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) vmcb12 = map.hva; nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); + + if (nested_svm_check_cached_vmcb12(vcpu) < 0) + goto unmap_save; + ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, vmcb12, false); if (ret) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 54e30b08801fc..990b4caf8b6df 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -782,6 +782,7 @@ static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) int nested_svm_exit_handled(struct vcpu_svm *svm); int nested_svm_check_permissions(struct kvm_vcpu *vcpu); +int nested_svm_check_cached_vmcb12(struct kvm_vcpu *vcpu); int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); int nested_svm_exit_special(struct vcpu_svm *svm); From 33fe95663d105df6398a0cb71c0114e8ae3b5346 Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Thu, 13 Nov 2025 11:27:21 +0000 Subject: [PATCH 307/554] net: stmmac: Disable EEE RX clock stop when VLAN is enabled commit c171e679ee66d7c0e2b58db9531af96797a76bca upstream. On the Renesas RZ/V2H EVK platform, where the stmmac MAC is connected to a Microchip KSZ9131RNXI PHY, creating or deleting VLAN interfaces may fail with timeouts: # ip link add link end1 name end1.5 type vlan id 5 15c40000.ethernet end1: Timeout accessing MAC_VLAN_Tag_Filter RTNETLINK answers: Device or resource busy Disabling EEE at runtime avoids the problem: # ethtool --set-eee end1 eee off # ip link add link end1 name end1.5 type vlan id 5 # ip link del end1.5 The stmmac hardware requires the receive clock to be running when writing certain registers, such as those used for MAC address configuration or VLAN filtering. However, by default the driver enables Energy Efficient Ethernet (EEE) and allows the PHY to stop the receive clock when the link is idle. As a result, the RX clock might be stopped when attempting to access these registers, leading to timeouts and other issues. Commit dd557266cf5fb ("net: stmmac: block PHY RXC clock-stop") addressed this issue for most register accesses by wrapping them in phylink_rx_clk_stop_block()/phylink_rx_clk_stop_unblock() calls. However, VLAN add/delete operations may be invoked with bottom halves disabled, where sleeping is not allowed, so using these helpers is not possible. Therefore, to fix this, disable the RX clock stop feature in the phylink configuration if VLAN features are set. This ensures the RX clock remains active and register accesses succeed during VLAN operations. Signed-off-by: Ovidiu Panait Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20251113112721.70500-3-ovidiu.panait.rb@renesas.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f8df0609e0e69..bd9aaab7f1712 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1204,7 +1204,11 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) /* Stmmac always requires an RX clock for hardware initialization */ config->mac_requires_rxc = true; - if (!(priv->plat->flags & STMMAC_FLAG_RX_CLK_RUNS_IN_LPI)) + /* Disable EEE RX clock stop to ensure VLAN register access works + * correctly. + */ + if (!(priv->plat->flags & STMMAC_FLAG_RX_CLK_RUNS_IN_LPI) && + !(priv->dev->features & NETIF_F_VLAN_FEATURES)) config->eee_rx_clk_stop_enable = true; /* Set the default transmit clock stop bit based on the platform glue */ From 8d09618840b99ef00154d3e731ce9b11e096196d Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Thu, 30 Apr 2026 11:29:55 -0400 Subject: [PATCH 308/554] net/sched: sch_red: Replace direct dequeue call with peek and qdisc_dequeue_peeked commit 458d5615272d3de535748342eb68ca492343048c upstream. When red qdisc has children (eg qfq qdisc) whose peek() callback is qdisc_peek_dequeued(), we could get a kernel panic. When the parent of such qdiscs (eg illustrated in patch #3 as tbf) wants to retrieve an skb from its child (red in this case), it will do the following: 1a. do a peek() - and when sensing there's an skb the child can offer, then - the child in this case(red) calls its child's (qfq) peek. qfq does the right thing and will return the gso_skb queue packet. Note: if there wasnt a gso_skb entry then qfq will store it there. 1b. invoke a dequeue() on the child (red). And herein lies the problem. - red will call the child's dequeue() which will essentially just try to grab something of qfq's queue. [ 78.667668][ T363] KASAN: null-ptr-deref in range [0x0000000000000048-0x000000000000004f] [ 78.667927][ T363] CPU: 1 UID: 0 PID: 363 Comm: ping Not tainted 7.1.0-rc1-00033-g46f74a3f7d57-dirty #790 PREEMPT(full) [ 78.668263][ T363] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 78.668486][ T363] RIP: 0010:qfq_dequeue+0x446/0xc90 [sch_qfq] [ 78.668718][ T363] Code: 54 c0 e8 dd 90 00 f1 48 c7 c7 e0 03 54 c0 48 89 de e8 ce 90 00 f1 48 8d 7b 48 b8 ff ff 37 00 48 89 fa 48 c1 e0 2a 48 c1 ea 03 <80> 3c 02 00 74 05 e8 ef a1 e1 f1 48 8b 7b 48 48 8d 54 24 58 48 8d [ 78.669312][ T363] RSP: 0018:ffff88810de573e0 EFLAGS: 00010216 [ 78.669533][ T363] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 78.669790][ T363] RDX: 0000000000000009 RSI: 0000000000000004 RDI: 0000000000000048 [ 78.670044][ T363] RBP: ffff888110dc4000 R08: ffffffffb1b0885a R09: fffffbfff6ba9078 [ 78.670297][ T363] R10: 0000000000000003 R11: ffff888110e31c80 R12: 0000001880000000 [ 78.670560][ T363] R13: ffff888110dc4150 R14: ffff888110dc42b8 R15: 0000000000000200 [ 78.670814][ T363] FS: 00007f66a8f09c40(0000) GS:ffff888163428000(0000) knlGS:0000000000000000 [ 78.671110][ T363] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 78.671324][ T363] CR2: 000055db4c6a30a8 CR3: 000000010da67000 CR4: 0000000000750ef0 [ 78.671585][ T363] PKRU: 55555554 [ 78.671713][ T363] Call Trace: [ 78.671843][ T363] [ 78.671936][ T363] ? __pfx_qfq_dequeue+0x10/0x10 [sch_qfq] [ 78.672148][ T363] ? __pfx__printk+0x10/0x10 [ 78.672322][ T363] ? srso_alias_return_thunk+0x5/0xfbef5 [ 78.672496][ T363] ? lockdep_hardirqs_on_prepare+0xa8/0x1a0 [ 78.672706][ T363] ? srso_alias_return_thunk+0x5/0xfbef5 [ 78.672875][ T363] ? trace_hardirqs_on+0x19/0x1a0 [ 78.673047][ T363] red_dequeue+0x65/0x270 [sch_red] [ 78.673217][ T363] ? srso_alias_return_thunk+0x5/0xfbef5 [ 78.673385][ T363] tbf_dequeue.cold+0xb0/0x70c [sch_tbf] [ 78.673566][ T363] __qdisc_run+0x169/0x1900 The right thing to do in #1b is to grab the skb off gso_skb queue. This patchset fixes that issue by changing #1b to use qdisc_dequeue_peeked() method instead. Fixes: 77be155cba4e ("pkt_sched: Add peek emulation for non-work-conserving qdiscs.") Reported-by: Manas Reported-by: Rakshit Awasthi Signed-off-by: Jamal Hadi Salim Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260430152957.194015-2-jhs@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_red.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 479c42d11083f..68ee41ce78c50 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -155,7 +155,7 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; - skb = child->dequeue(child); + skb = qdisc_dequeue_peeked(child); if (skb) { qdisc_bstats_update(sch, skb); qdisc_qstats_backlog_dec(sch, skb); From 6f49f94f3b11fe8bff1bf2a054143789e76aaf17 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 11 May 2026 08:55:11 -0700 Subject: [PATCH 309/554] exit: prevent preemption of oopsing TASK_DEAD task commit c1fa0bb633e4a6b11e83ffc57fa5abe8ebb87891 upstream. When an already-exiting task oopses, make_task_dead() currently calls do_task_dead() with preemption enabled. That is forbidden: do_task_dead() calls __schedule(), which has a comment saying "WARNING: must be called with preemption disabled!". If an oopsing task is preempted in do_task_dead(), between becoming TASK_DEAD and entering the scheduler explicitly, bad things happen: finish_task_switch() assumes that once the scheduler has switched away from a TASK_DEAD task, the task can never run again and its stack is no longer needed; but that assumption apparently doesn't hold if the dead task was preempted (the SM_PREEMPT case). This means that the scheduler ends up repeatedly dropping references on the dead task's stack, which can lead to use-after-free or double-free of the entire task stack; in other words, two tasks can end up running on the same stack, resulting in various kinds of memory corruption. (This does not just affect "recursively oopsing" tasks; it is enough to oops once during task exit, for example in a file_operations::release handler) Fixes: 7f80a2fd7db9 ("exit: Stop poorly open coding do_task_dead in make_task_dead") Cc: stable@kernel.org Signed-off-by: Jann Horn Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/exit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/exit.c b/kernel/exit.c index c8c3ff935a84b..c97db291c5d1c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1069,6 +1069,7 @@ void __noreturn make_task_dead(int signr) futex_exit_recursive(tsk); tsk->exit_state = EXIT_DEAD; refcount_inc(&tsk->rcu_users); + preempt_disable(); do_task_dead(); } From 5860ab3ddeaab0c84313f5d5f106235c83ea012d Mon Sep 17 00:00:00 2001 From: Quan Zhou Date: Thu, 27 Nov 2025 15:49:11 +0800 Subject: [PATCH 310/554] wifi: mt76: mt7925: fix AMPDU state handling in mt7925_tx_check_aggr commit bb8e38fcdbf7290d7f0cd572d2d8fdb2b641b492 upstream. Previously, the AMPDU state bit for a given TID was set before attempting to start a BA session, which could result in the AMPDU state being marked active even if ieee80211_start_tx_ba_session() failed. This patch changes the logic to only set the AMPDU state bit after successfully starting a BA session, ensuring proper synchronization between AMPDU state and BA session status. This fixes potential issues with aggregation state tracking and improves compatibility with mac80211 BA session management. Fixes: 44eb173bdd4f ("wifi: mt76: mt7925: add link handling in mt7925_txwi_free") Cc: stable@vger.kernel.org Signed-off-by: Quan Zhou Reviewed-by: Sean Wang Link: https://patch.msgid.link/d5960fbced0beaf33c30203f7f8fb91d0899c87b.1764228973.git.quan.zhou@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/mt7925/mac.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c index e880f3820a1ad..a048ab1feef05 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c @@ -881,8 +881,10 @@ static void mt7925_tx_check_aggr(struct ieee80211_sta *sta, struct sk_buff *skb, else mlink = &msta->deflink; - if (!test_and_set_bit(tid, &mlink->wcid.ampdu_state)) - ieee80211_start_tx_ba_session(sta, tid, 0); + if (!test_and_set_bit(tid, &mlink->wcid.ampdu_state)) { + if (ieee80211_start_tx_ba_session(sta, tid, 0)) + clear_bit(tid, &mlink->wcid.ampdu_state); + } } static bool From 6fc7c8b414ce1f1fe9dde805c6ca3f978d05fab6 Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Mon, 8 Sep 2025 15:25:26 +0800 Subject: [PATCH 311/554] wifi: mt76: mt7925: fix incorrect length field in txpower command commit ccb186326bb6b7f20d77982f855568e7087ad0d7 upstream. Set `tx_power_tlv->len` to `msg_len` instead of `sizeof(*tx_power_tlv)` to ensure the correct message length is sent to firmware. Cc: stable@vger.kernel.org Fixes: c948b5da6bbe ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips") Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250908072526.1833938-1-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index a411838147ec5..e44c9ba168878 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -3673,7 +3673,7 @@ mt7925_mcu_rate_txpower_band(struct mt76_phy *phy, memcpy(tx_power_tlv->alpha2, dev->alpha2, sizeof(dev->alpha2)); tx_power_tlv->n_chan = num_ch; tx_power_tlv->tag = cpu_to_le16(0x1); - tx_power_tlv->len = cpu_to_le16(sizeof(*tx_power_tlv)); + tx_power_tlv->len = cpu_to_le16(msg_len); switch (band) { case NL80211_BAND_2GHZ: From 0aa63d33742b805d1a218d18d12b983cce4b2f7b Mon Sep 17 00:00:00 2001 From: Leon Yen Date: Thu, 9 Oct 2025 10:01:58 +0800 Subject: [PATCH 312/554] wifi: mt76: mt7921: fix a potential clc buffer length underflow commit 5373f8b19e568b5c217832b9bbef165bd2b2df14 upstream. The buf_len is used to limit the iterations for retrieving the country power setting and may underflow under certain conditions due to changes in the power table in CLC. This underflow leads to an almost infinite loop or an invalid power setting resulting in driver initialization failure. Cc: stable@vger.kernel.org Fixes: fa6ad88e023d ("wifi: mt76: mt7921: fix country count limitation for CLC") Signed-off-by: Leon Yen Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20251009020158.1923429-1-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index edc1df3c071e5..663b245f2891f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -1353,6 +1353,9 @@ int __mt7921_mcu_set_clc(struct mt792x_dev *dev, u8 *alpha2, u16 len = le16_to_cpu(rule->len); u16 offset = len + sizeof(*rule); + if (buf_len < offset) + break; + pos += offset; buf_len -= offset; if (rule->alpha2[0] != alpha2[0] || From 6d55948a62aba02730330367424de0ed4b242d61 Mon Sep 17 00:00:00 2001 From: Quan Zhou Date: Fri, 23 Jan 2026 10:16:25 +0800 Subject: [PATCH 313/554] wifi: mt76: mt7921: fix ROC abort flow interruption in mt7921_roc_work commit fdfa39f9f4fbae532b162da913a67b2410caf38f upstream. The mt7921_set_roc API may be executed concurrently with mt7921_roc_work, specifically between the following code paths: - The check and clear of MT76_STATE_ROC in mt7921_roc_work: if (!test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state)) return; - The execution of ieee80211_iterate_active_interfaces. This race condition can interrupt the ROC abort flow, resulting in the ROC process failing to abort as expected. To address this defect, the modification of MT76_STATE_ROC is now protected by mt792x_mutex_acquire(phy->dev). This ensures that changes to the ROC state are properly synchronized, preventing race conditions and ensuring the ROC abort flow is not interrupted. Fixes: 034ae28b56f1 ("wifi: mt76: mt7921: introduce remain_on_channel support") Cc: stable@vger.kernel.org Signed-off-by: Quan Zhou Reviewed-by: Sean Wang Link: https://patch.msgid.link/2568ece8b557e5dda79391414c834ef3233049b6.1769133724.git.quan.zhou@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/mt7921/main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 67383c41a3199..5e676916593d3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -387,10 +387,11 @@ void mt7921_roc_work(struct work_struct *work) phy = (struct mt792x_phy *)container_of(work, struct mt792x_phy, roc_work); - if (!test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state)) - return; - mt792x_mutex_acquire(phy->dev); + if (!test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state)) { + mt792x_mutex_release(phy->dev); + return; + } ieee80211_iterate_active_interfaces(phy->mt76->hw, IEEE80211_IFACE_ITER_RESUME_ALL, mt7921_roc_iter, phy); From 9d1bc155802943e92c57a5fb923d23edfbf0b525 Mon Sep 17 00:00:00 2001 From: Tristan Madani Date: Fri, 17 Apr 2026 11:11:45 +0000 Subject: [PATCH 314/554] wifi: b43legacy: enforce bounds check on firmware key index in RX path commit a035766f970bde2d4298346a31a80685be5c0205 upstream. Same fix as b43: the firmware-controlled key index in b43legacy_rx() can exceed dev->max_nr_keys. The existing B43legacy_WARN_ON is non-enforcing in production builds, allowing an out-of-bounds read of dev->key[]. Make the check enforcing by dropping the frame for invalid indices. Fixes: 75388acd0cd8 ("[B43LEGACY]: add mac80211-based driver for legacy BCM43xx devices") Cc: stable@vger.kernel.org Signed-off-by: Tristan Madani Link: https://patch.msgid.link/20260417111145.2694196-2-tristmd@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/b43legacy/xmit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43legacy/xmit.c b/drivers/net/wireless/broadcom/b43legacy/xmit.c index efd63f4ce74f2..ee199d4eaf039 100644 --- a/drivers/net/wireless/broadcom/b43legacy/xmit.c +++ b/drivers/net/wireless/broadcom/b43legacy/xmit.c @@ -476,7 +476,8 @@ void b43legacy_rx(struct b43legacy_wldev *dev, * key index, but the ucode passed it slightly different. */ keyidx = b43legacy_kidx_to_raw(dev, keyidx); - B43legacy_WARN_ON(keyidx >= dev->max_nr_keys); + if (B43legacy_WARN_ON(keyidx >= dev->max_nr_keys)) + goto drop; if (dev->key[keyidx].algorithm != B43legacy_SEC_ALGO_NONE) { /* Remove PROTECTED flag to mark it as decrypted. */ From e131562d6f2b958148c35c98831b007f47f0e3d3 Mon Sep 17 00:00:00 2001 From: Catherine Date: Fri, 24 Apr 2026 21:14:36 +0800 Subject: [PATCH 315/554] wifi: mac80211: drop stray 'static' from fast-RX rx_result commit 7a5b81e0c87a075afd572f659d8eb68c9c4cd2ba upstream. ieee80211_invoke_fast_rx() is documented as safe for parallel RX, but its per-invocation rx_result is declared static. Concurrent callers then share one instance and can overwrite each other's result between ieee80211_rx_mesh_data() and the switch on res. That can make a packet that was queued or consumed by ieee80211_rx_mesh_data() fall through into ieee80211_rx_8023(), or make a packet that should continue return as queued. Make res an automatic variable so each invocation keeps its own result. Fixes: 3468e1e0c639 ("wifi: mac80211: add mesh fast-rx support") Cc: stable@vger.kernel.org Signed-off-by: Catherine Link: https://patch.msgid.link/20260424131435.83212-2-enderaoelyther@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5091a3c15b0b4..0a986607e6592 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4824,7 +4824,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, struct sk_buff *skb = rx->skb; struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - static ieee80211_rx_result res; + ieee80211_rx_result res; int orig_len = skb->len; int hdrlen = ieee80211_hdrlen(hdr->frame_control); int snap_offs = hdrlen; From 4f9a4ae8d2c198f01611ea376034c326ef43ab56 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Thu, 23 Apr 2026 02:38:46 +0900 Subject: [PATCH 316/554] wifi: rsi: fix kthread lifetime race between self-exit and external-stop commit db57a1aa54ff68669781976e4edb045e09e2b65b upstream. RSI driver use both self-exit(kthread_complete_and_exit) and external-stop (kthread_stop) when killing a kthread. Generally, kthread_stop() is called first, and in this case, no particular issues occur. However, in rare instances where kthread_complete_and_exit() is called first and then kthread_stop() is called, a UAF occurs because the kthread object, which has already exited and been freed, is accessed again. Therefore, to prevent this with minimal modification, you must remove kthread_stop() and change the code to wait until the self-exit operation is completed. Cc: Reported-by: syzbot+5de83f57cd8531f55596@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/69e5d03b.a00a0220.1bd0ca.0064.GAE@google.com/ Fixes: 4c62764d0fc2 ("rsi: improve kernel thread handling to fix kernel panic") Signed-off-by: Jeongjun Park Link: https://patch.msgid.link/20260422173846.37640-1-aha310510@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rsi/rsi_common.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h index 7aa5124575cfe..c40f8101febcb 100644 --- a/drivers/net/wireless/rsi/rsi_common.h +++ b/drivers/net/wireless/rsi/rsi_common.h @@ -70,12 +70,11 @@ static inline int rsi_create_kthread(struct rsi_common *common, return 0; } -static inline int rsi_kill_thread(struct rsi_thread *handle) +static inline void rsi_kill_thread(struct rsi_thread *handle) { atomic_inc(&handle->thread_done); rsi_set_event(&handle->event); - - return kthread_stop(handle->task); + wait_for_completion(&handle->completion); } void rsi_mac80211_detach(struct rsi_hw *hw); From 7577a4b8a10fab45a6ee2045ea038a5adadbb585 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 5 May 2026 15:15:40 +0200 Subject: [PATCH 317/554] wifi: mac80211: use safe list iteration in radar detect work commit ac8eb3e18f41e2cc8492cc1d358bcb786c850270 upstream. The call to ieee80211_dfs_cac_cancel can cause the iterated chanctx to be freed and removed from the list. Guard against this to avoid a slab-use-after-free error. Cc: stable@vger.kernel.org Fixes: bca8bc0399ac ("wifi: mac80211: handle ieee80211_radar_detected() for MLO") Signed-off-by: Benjamin Berg Link: https://patch.msgid.link/20260505151539.236d63a1b736.I35dbb9e96a2d4a480be208770fdd99ba3b817b79@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index c9931537f9d2d..62e1a37849d04 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3565,11 +3565,11 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, struct ieee80211_local *local = container_of(work, struct ieee80211_local, radar_detected_work); struct cfg80211_chan_def chandef; - struct ieee80211_chanctx *ctx; + struct ieee80211_chanctx *ctx, *tmp; lockdep_assert_wiphy(local->hw.wiphy); - list_for_each_entry(ctx, &local->chanctx_list, list) { + list_for_each_entry_safe(ctx, tmp, &local->chanctx_list, list) { if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) continue; From e9f1081bc775146156def0dbc821b92f35d56afb Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 9 Dec 2025 11:04:59 +0100 Subject: [PATCH 318/554] wifi: ath5k: do not access array OOB commit d748603f12baff112caa3ab7d39f50100f010dbd upstream. Vincent reports: > The ath5k driver seems to do an array-index-out-of-bounds access as > shown by the UBSAN kernel message: > UBSAN: array-index-out-of-bounds in drivers/net/wireless/ath/ath5k/base.c:1741:20 > index 4 is out of range for type 'ieee80211_tx_rate [4]' > ... > Call Trace: > > dump_stack_lvl+0x5d/0x80 > ubsan_epilogue+0x5/0x2b > __ubsan_handle_out_of_bounds.cold+0x46/0x4b > ath5k_tasklet_tx+0x4e0/0x560 [ath5k] > tasklet_action_common+0xb5/0x1c0 It is real. 'ts->ts_final_idx' can be 3 on 5212, so: info->status.rates[ts->ts_final_idx + 1].idx = -1; with the array defined as: struct ieee80211_tx_rate rates[IEEE80211_TX_MAX_RATES]; while the size is: #define IEEE80211_TX_MAX_RATES 4 is indeed bogus. Set this 'idx = -1' sentinel only if the array index is less than the array size. As mac80211 will not look at rates beyond the size (IEEE80211_TX_MAX_RATES). Note: The effect of the OOB write is negligible. It just overwrites the next member of info->status, i.e. ack_signal. Signed-off-by: Jiri Slaby (SUSE) Reported-by: Vincent Danjean Link: https://lore.kernel.org/all/aQYUkIaT87ccDCin@eldamar.lan Closes: https://bugs.debian.org/1119093 Fixes: 6d7b97b23e11 ("ath5k: fix tx status reporting issues") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251209100459.2253198-1-jirislaby@kernel.org Signed-off-by: Jeff Johnson Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath5k/base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 4d88b02ffa795..917e1b087924f 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -1738,7 +1738,8 @@ ath5k_tx_frame_completed(struct ath5k_hw *ah, struct sk_buff *skb, } info->status.rates[ts->ts_final_idx].count = ts->ts_final_retry; - info->status.rates[ts->ts_final_idx + 1].idx = -1; + if (ts->ts_final_idx + 1 < IEEE80211_TX_MAX_RATES) + info->status.rates[ts->ts_final_idx + 1].idx = -1; if (unlikely(ts->ts_status)) { ah->stats.ack_fail++; From 9e28654f79f443bca9b29ff3ae7cf18abfba58a0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 May 2026 15:15:34 +0200 Subject: [PATCH 319/554] wifi: mac80211: remove station if connection prep fails commit 283fc9e44ff5b5ac967439b4951b80bd4299f4e4 upstream. If connection preparation fails for MLO connections, then the interface is completely reset to non-MLD. In this case, we must not keep the station since it's related to the link of the vif being removed. Delete an existing station. Any "new_sta" is already being removed, so that doesn't need changes. This fixes a use-after-free/double-free in debugfs if that's enabled, because a vif going from MLD (and to MLD, but that's not relevant here) recreates its entire debugfs. Cc: stable@vger.kernel.org Fixes: 81151ce462e5 ("wifi: mac80211: support MLO authentication/association with one link") Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20260505151533.c4e52deb06ad.Iafe56cec7de8512626169496b134bce3a6c17010@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f119149bcc1c1..deef15c074c83 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8926,7 +8926,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss = (void *)cbss->priv; struct sta_info *new_sta = NULL; struct ieee80211_link_data *link; - bool have_sta = false; + struct sta_info *have_sta = NULL; bool mlo; int err; u16 new_links; @@ -8945,11 +8945,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, mlo = false; } - if (assoc) { - rcu_read_lock(); + if (assoc) have_sta = sta_info_get(sdata, ap_mld_addr); - rcu_read_unlock(); - } if (mlo && !have_sta && WARN_ON(sdata->vif.valid_links || sdata->vif.active_links)) @@ -9108,6 +9105,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, out_release_chan: ieee80211_link_release_channel(link); out_err: + if (mlo && have_sta) + WARN_ON(__sta_info_destroy(have_sta)); ieee80211_vif_set_links(sdata, 0, 0); return err; } From d7029879bafdac2006c67553807d122283dc6cbf Mon Sep 17 00:00:00 2001 From: Tristan Madani Date: Fri, 17 Apr 2026 11:11:44 +0000 Subject: [PATCH 320/554] wifi: b43: enforce bounds check on firmware key index in b43_rx() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1f4f78bf8549e6ac4f04fba4176854f3a6e0c332 upstream. The firmware-controlled key index in b43_rx() can exceed the dev->key[] array size (58 entries). The existing B43_WARN_ON is non-enforcing in production builds, allowing an out-of-bounds read. Make the B43_WARN_ON check enforcing by dropping the frame when the firmware returns an invalid key index. Suggested-by: Jonas Gorski Acked-by: Michael Büsch Fixes: e4d6b7951812 ("[B43]: add mac80211-based driver for modern BCM43xx devices") Cc: stable@vger.kernel.org Signed-off-by: Tristan Madani Link: https://patch.msgid.link/20260417111145.2694196-1-tristmd@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/b43/xmit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43/xmit.c b/drivers/net/wireless/broadcom/b43/xmit.c index 7651b1bdb5926..f0b082596637f 100644 --- a/drivers/net/wireless/broadcom/b43/xmit.c +++ b/drivers/net/wireless/broadcom/b43/xmit.c @@ -702,7 +702,8 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr) * key index, but the ucode passed it slightly different. */ keyidx = b43_kidx_to_raw(dev, keyidx); - B43_WARN_ON(keyidx >= ARRAY_SIZE(dev->key)); + if (B43_WARN_ON(keyidx >= ARRAY_SIZE(dev->key))) + goto drop; if (dev->key[keyidx].algorithm != B43_SEC_ALGO_NONE) { wlhdr_len = ieee80211_hdrlen(fctl); From 658d2e46c2e9a8eb9b80c5e803ce3c89885b3366 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 16 Apr 2026 11:33:39 +0200 Subject: [PATCH 321/554] wifi: brcmfmac: Fix potential use-after-free issue when stopping watchdog task commit c623b63580880cc742255eaed3d79804c1b91143 upstream. Watchdog task might end between send_sig() and kthread_stop() calls, what results in the use-after-free issue. Fix this by increasing watchdog task reference count before calling send_sig() and dropping it by switching to kthread_stop_put(). Cc: stable@vger.kernel.org Fixes: 373c83a801f1 ("brcmfmac: stop watchdog before detach and free everything") Fixes: a9ffda88be74 ("brcm80211: fmac: abstract bus_stop interface function pointer") Signed-off-by: Marek Szyprowski Acked-by: Arend van Spriel Link: https://patch.msgid.link/20260416093339.2066829-1-m.szyprowski@samsung.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 4e6ed02c15913..a0e88dbaaebe6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -2476,8 +2476,9 @@ static void brcmf_sdio_bus_stop(struct device *dev) brcmf_dbg(TRACE, "Enter\n"); if (bus->watchdog_tsk) { + get_task_struct(bus->watchdog_tsk); send_sig(SIGTERM, bus->watchdog_tsk, 1); - kthread_stop(bus->watchdog_tsk); + kthread_stop_put(bus->watchdog_tsk); bus->watchdog_tsk = NULL; } @@ -4567,8 +4568,9 @@ void brcmf_sdio_remove(struct brcmf_sdio *bus) if (bus) { /* Stop watchdog task */ if (bus->watchdog_tsk) { + get_task_struct(bus->watchdog_tsk); send_sig(SIGTERM, bus->watchdog_tsk, 1); - kthread_stop(bus->watchdog_tsk); + kthread_stop_put(bus->watchdog_tsk); bus->watchdog_tsk = NULL; } From 8247f52d822180e94ccbfdab91613af386a4e34d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 20 Apr 2026 18:11:03 +0200 Subject: [PATCH 322/554] usb: usblp: fix heap leak in IEEE 1284 device ID via short response commit 7a400c6fe3617e31e690e3f7ca37bb335e0498f3 upstream. usblp_ctrl_msg() collapses the usb_control_msg() return value to 0/-errno, discarding the actual number of bytes transferred. A broken printer can complete the GET_DEVICE_ID control transfer short and the driver has no way to know. usblp_cache_device_id_string() reads the 2-byte big-endian length prefix from the response and trusts it (clamped only to the buffer bounds). The buffer is kmalloc(1024) at probe time. A device that sends exactly two bytes (e.g. 0x03 0xFF, claiming a 1023-byte ID) leaves device_id_string[2..1022] holding stale kmalloc heap. That stale data is then exposed: - via the ieee1284_id sysfs attribute (sprintf("%s", buf+2), truncated at the first NUL in the stale heap), and - via the IOCNR_GET_DEVICE_ID ioctl, which copy_to_user()s the full claimed length regardless of NULs, up to 1021 bytes of uninitialized heap, with the leak size chosen by the device. Fix this up by just zapping the buffer with zeros before each request sent to the device. Cc: Pete Zaitcev Assisted-by: gkh_clanker_t1000 Cc: stable Link: https://patch.msgid.link/2026042002-unicorn-greedily-3c63@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index a7a1d38b6bef7..2c47acdc35be4 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -1377,6 +1377,7 @@ static int usblp_cache_device_id_string(struct usblp *usblp) { int err, length; + memset(usblp->device_id_string, 0, USBLP_DEVICE_ID_SIZE); err = usblp_get_id(usblp, 0, usblp->device_id_string, USBLP_DEVICE_ID_SIZE - 1); if (err < 0) { dev_dbg(&usblp->intf->dev, From 762a6ccf391db0d629e590a803a3a2231e17dd3f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 20 Apr 2026 18:11:04 +0200 Subject: [PATCH 323/554] usb: usblp: fix uninitialized heap leak via LPGETSTATUS ioctl commit b38e53cbfb9d84732e5984fbd73e128d592415c5 upstream. Just like in a previous problem in this driver, usblp_ctrl_msg() will collapse the usb_control_msg() return value to 0/-errno, discarding the actual number of bytes transferred. Ideally that short command should be detected and error out, but many printers are known to send "incorrect" responses back so we can't just do that. statusbuf is kmalloc(8) at probe time and never filled before the first LPGETSTATUS ioctl. usblp_read_status() requests 1 byte. If a malicious printer responds with zero bytes, *statusbuf is one byte of stale kmalloc heap, sign-extended into the local int status, which the LPGETSTATUS path then copy_to_user()s directly to the ioctl caller. Fix this all by just zapping out the memory buffer when allocated at probe time. If a later call does a short read, the data will be identical to what the device sent it the last time, so there is no "leak" of information happening. Cc: Pete Zaitcev Assisted-by: gkh_clanker_t1000 Cc: stable Link: https://patch.msgid.link/2026042011-shredder-savage-48c6@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 2c47acdc35be4..a213b8397d44a 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -1178,7 +1178,7 @@ static int usblp_probe(struct usb_interface *intf, } /* Allocate buffer for printer status */ - usblp->statusbuf = kmalloc(STATUS_BUF_SIZE, GFP_KERNEL); + usblp->statusbuf = kzalloc(STATUS_BUF_SIZE, GFP_KERNEL); if (!usblp->statusbuf) { retval = -ENOMEM; goto abort; From 00e095dc5787832f584980ec4ed35f1e06124c7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Mon, 4 May 2026 11:08:45 -0300 Subject: [PATCH 324/554] ALSA: usb-audio: midi2: Restart output URBs on resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f3c57c9c2a49a21d784b7c04a2c883bffc070659 upstream. USB MIDI 2.0 suspend saves the endpoint running state, clears it and kills all endpoint URBs. Resume restores the running state, but only restarts input endpoints. For a running output endpoint, this leaves the endpoint marked running with an empty URB queue. Output transfer progress depends on either the rawmidi trigger path starting the queue or an output completion refilling it. After suspend there is no completion left, and output data that remains queued in the raw UMP or legacy rawmidi buffer can stay stalled until userspace happens to trigger the stream again. Restore the saved state with atomic accessors, keep input endpoints restarted as before, and restart output endpoints that were running before suspend. Clear the saved suspend state after restoring it. Fixes: ff49d1df79ae ("ALSA: usb-audio: USB MIDI 2.0 UMP support") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260504-usb-midi2-output-resume-v1-1-c089cc8ad3c6@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/midi2.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/usb/midi2.c b/sound/usb/midi2.c index e6793f3bdfc33..0ca3819fc34b6 100644 --- a/sound/usb/midi2.c +++ b/sound/usb/midi2.c @@ -227,7 +227,7 @@ static void kill_midi_urbs(struct snd_usb_midi2_endpoint *ep, bool suspending) if (!ep) return; if (suspending) - ep->suspended = ep->running; + atomic_set(&ep->suspended, atomic_read(&ep->running)); atomic_set(&ep->running, 0); for (i = 0; i < ep->num_urbs; i++) { if (!ep->urbs[i].urb) @@ -1190,10 +1190,11 @@ void snd_usb_midi_v2_suspend_all(struct snd_usb_audio *chip) static void resume_midi2_endpoint(struct snd_usb_midi2_endpoint *ep) { - ep->running = ep->suspended; - if (ep->direction == STR_IN) + atomic_set(&ep->running, atomic_read(&ep->suspended)); + atomic_set(&ep->suspended, 0); + + if (ep->direction == STR_IN || atomic_read(&ep->running)) submit_io_urbs(ep); - /* FIXME: does it all? */ } void snd_usb_midi_v2_resume_all(struct snd_usb_audio *chip) From be09b47ed8677d76962e3240c145502e2ad9f3c8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2026 17:22:15 +0200 Subject: [PATCH 325/554] ALSA: usb-audio: Avoid potential endless loop in convert_chmap_v3() commit 6e7247d8f5fefeceb0bb9cc80a5388a636b219cd upstream. The convert_chmap_v3() has a loop with its increment size of cs_desc->wLength, but we forgot to validate cs_desc->wLength itself, which may lead to potential endless loop by a malformed descriptor. Add a proper size check to abort the loop for plugging the hole. Fixes: ecfd41166b72 ("ALSA: usb-audio: Validate UAC3 cluster segment descriptors") Cc: Link: https://patch.msgid.link/20260427152224.15276-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/stream.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 3b2526964e4b4..30523da73ec5e 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -352,6 +352,8 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor if (len < sizeof(*cs_desc)) break; cs_len = le16_to_cpu(cs_desc->wLength); + if (cs_len < sizeof(*cs_desc)) + break; if (len < cs_len) break; cs_type = cs_desc->bSegmentType; From 411f2f7274e3e8b58ac16b235102625a32f3a704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Fri, 24 Apr 2026 18:50:10 -0300 Subject: [PATCH 326/554] ALSA: usb-audio: Fix UAC3 cluster descriptor size check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 26265dd69da32d88a88d21987853cec899d9e21f upstream. The UAC3 cluster descriptor length check in snd_usb_get_audioformat_uac3()was added to make sure that the buffer is large enough for a struct uac3_cluster_header_descriptor before the returned data is cast and used. However, the check uses sizeof(cluster), where cluster is a pointer, not the size of the descriptor header. This makes the validation depend on the architecture pointer size and does not match the intended object size. Check against sizeof(*cluster) instead. Fixes: fb4e2a6e8f28 ("ALSA: usb-audio: Fix out-of-bounds read in snd_usb_get_audioformat_uac3()") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260424-alsa-usb-uac3-cluster-size-v1-1-99a5808898a3@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 30523da73ec5e..8b83092a1999e 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -999,7 +999,7 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, * and request Cluster Descriptor */ wLength = le16_to_cpu(hc_header.wLength); - if (wLength < sizeof(cluster)) + if (wLength < sizeof(*cluster)) return NULL; cluster = kzalloc(wLength, GFP_KERNEL); if (!cluster) From 52a0cf7cde26ae35ac0b7c8a89bb8143115c46f3 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Mon, 13 Apr 2026 21:49:12 +0300 Subject: [PATCH 327/554] USB: omap_udc: DMA: Don't enable burst 4 mode commit 3f91484f6c13c434bd573ca6b6779c26adb0ddab upstream. Commit 65111084c63d7 ("USB: more omap_udc updates (dma and omap1710)") added setting for DMA burst 4 mode. But I think this should be undone for two reasons: - It breaks DMA on 15xx boards - transfers just silently stall. - On newer OMAP1 boards, like Nokia 770 (omap1710), there is no measurable performance impact when testing TCP throughput with g_ether with large 15000 byte MTU size. It's also worth noting that when the original change was made, the OMAP_DMA_DATA_BURST_4 handling in arch/arm/plat-omap/dma.c was broken, and actually resulted in the same as the OMAP_DMA_DATA_BURST_DIS i.e. burst disabled. This was fixed not until a couple kernel releases later in an unrelated commit 1a8bfa1eb998a ("[ARM] 3142/1: OMAP 2/5: Update files common to omap1 and omap2"). So based on this it seems there was never really a very good reason to enable this burst mode in omap_udc, so remove it now to allow 15xx DMA to work again (it provides 2x throughput compared to PIO mode). Fixes: 65111084c63d ("[PATCH] USB: more omap_udc updates (dma and omap1710)") Cc: stable Signed-off-by: Aaro Koskinen Link: https://patch.msgid.link/ad06qHLclWHeSGnV@darkstar.musicnaut.iki.fi Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/omap_udc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index 062bf2b57d2ea..40f2990ed21b7 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -733,8 +733,6 @@ static void dma_channel_claim(struct omap_ep *ep, unsigned channel) if (status == 0) { omap_writew(reg, UDC_TXDMA_CFG); /* EMIFF or SDRC */ - omap_set_dma_src_burst_mode(ep->lch, - OMAP_DMA_DATA_BURST_4); omap_set_dma_src_data_pack(ep->lch, 1); /* TIPB */ omap_set_dma_dest_params(ep->lch, @@ -756,8 +754,6 @@ static void dma_channel_claim(struct omap_ep *ep, unsigned channel) UDC_DATA_DMA, 0, 0); /* EMIFF or SDRC */ - omap_set_dma_dest_burst_mode(ep->lch, - OMAP_DMA_DATA_BURST_4); omap_set_dma_dest_data_pack(ep->lch, 1); } } From 6ef872d157f49ae27d01e20b809223c3353de92c Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Mon, 27 Apr 2026 11:17:46 +0200 Subject: [PATCH 328/554] USB: serial: option: add Telit Cinterion LE910Cx compositions commit 100201d349edd226ca3470c894c92dccc67ee7a8 upstream. Add the following Telit Cinterion LE910Cx compositions: 0x1251: RNDIS + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (SAP) T: Bus=01 Lev=01 Prnt=21 Port=06 Cnt=01 Dev#=108 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1251 Rev=03.18 S: Manufacturer=Android S: Product=LE910C1-EU S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=02 Prot=ff Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms 0x1253: ECM + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (SAP) T: Bus=01 Lev=01 Prnt=21 Port=06 Cnt=01 Dev#=121 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1253 Rev=03.18 S: Manufacturer=Android S: Product=LE910C1-EU S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms 0x1254: tty (AT) + tty (AT) T: Bus=01 Lev=01 Prnt=21 Port=06 Cnt=01 Dev#=122 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1254 Rev=03.18 S: Manufacturer=Android S: Product=LE910C1-EU S: SerialNumber=0123456789ABCDEF C: #Ifs= 2 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms 0x1255: tty (AT/NMEA) + tty (AT) + tty (AT) + tty (SAP) T: Bus=01 Lev=01 Prnt=21 Port=06 Cnt=01 Dev#=123 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1255 Rev=03.18 S: Manufacturer=Android S: Product=LE910C1-EU S: SerialNumber=0123456789ABCDEF C: #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 5f16ea44084fe..8add3a5477f6c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1513,7 +1513,11 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff), /* Telit LE910Cx (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x1250, 0xff, 0x00, 0x00) }, /* Telit LE910Cx (rmnet) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1251, 0xff) }, /* Telit LE910Cx (RNDIS) */ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1252, 0xff) }, /* Telit LE910Cx (MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1253, 0xff) }, /* Telit LE910Cx (ECM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1254, 0xff) }, /* Telit LE910Cx */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1255, 0xff) }, /* Telit LE910Cx */ { USB_DEVICE(TELIT_VENDOR_ID, 0x1260), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1261), From 2a71e01b2cf9b4329ff67102c1bea7448c2a2d2d Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 7 Apr 2026 21:21:22 +0800 Subject: [PATCH 329/554] usb: ulpi: fix memory leak on ulpi_register() error paths commit 0b9fcab1b8608d429e5f239afb197de928d4de7d upstream. Commit 01af542392b5 ("usb: ulpi: fix double free in ulpi_register_interface() error path") removed kfree(ulpi) from ulpi_register_interface() to fix a double-free when device_register() fails. But when ulpi_of_register() or ulpi_read_id() fail before device_register() is called, the ulpi allocation is leaked. Add kfree(ulpi) on both error paths to properly clean up the allocation. Fixes: 01af542392b5 ("usb: ulpi: fix double free in ulpi_register_interface() error path") Cc: stable Signed-off-by: Felix Gu Reviewed-by: Heikki Krogerus Link: https://patch.msgid.link/20260407-ulpi-v1-1-f3fafe53f7b2@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/ulpi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index d895cf6532a21..95caad156de58 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -286,12 +286,15 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi) ACPI_COMPANION_SET(&ulpi->dev, ACPI_COMPANION(dev)); ret = ulpi_of_register(ulpi); - if (ret) + if (ret) { + kfree(ulpi); return ret; + } ret = ulpi_read_id(ulpi); if (ret) { of_node_put(ulpi->dev.of_node); + kfree(ulpi); return ret; } From 0c1c186f8f488bde2d744d8e98b2e02a140acfaf Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 24 Apr 2026 15:40:09 +0800 Subject: [PATCH 330/554] usb: typec: tcpm: fix debug accessory mode detection for sink ports commit f6ec9bb4acc7182b25a793ad094a764e1cb819a7 upstream. The port in debug accessory mode can be either a source or sink. The previous tcpm_port_is_debug() function only checked for source port. Commit 8db73e6a42b6 ("usb: typec: tcpm: allow sink (ufp) to toggle into accessory mode debug") changed the detection logic to support both roles, but left some logic in _tcpm_cc_change() unchanged, This causes the state machine to transition to an incorrect state when operating as a sink in debug accessory mode. Log as below: [ 978.637541] CC1: 0 -> 5, CC2: 0 -> 5 [state TOGGLING, polarity 0, connected] [ 978.637567] state change TOGGLING -> SRC_ATTACH_WAIT [rev1 NONE_AMS] [ 978.637596] pending state change SRC_ATTACH_WAIT -> DEBUG_ACC_ATTACHED @ 180 ms [rev1 NONE_AMS] [ 978.647098] CC1: 5 -> 0, CC2: 5 -> 5 [state SRC_ATTACH_WAIT, polarity 0, connected] [ 978.647115] state change SRC_ATTACH_WAIT -> SRC_ATTACH_WAIT [rev1 NONE_AMS] It should go to SNK_ATTACH_WAIT instead of SRC_ATTACH_WAIT state. To fix this, add tcpm_port_is_debug_source() and tcpm_port_is_debug_sink() helper to explicitly identify the power mode in debug accessory mode. Update the state transition logic in _tcpm_cc_change() to ensure the state machine transitions comply with Type-C specification. Also update the logic in run_state_machine() to keep consistency. Fixes: 8db73e6a42b6 ("usb: typec: tcpm: allow sink (ufp) to toggle into accessory mode debug") Cc: stable Signed-off-by: Xu Yang Acked-by: Heikki Krogerus Reviewed-by: Amit Sunil Dhamne Link: https://patch.msgid.link/20260424074009.2979266-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index cc78770509dbc..fe5a04f4cf148 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -634,9 +634,14 @@ static const char * const pd_rev[] = { (tcpm_cc_is_source((port)->cc2) && \ !tcpm_cc_is_source((port)->cc1))) +#define tcpm_port_is_debug_source(port) \ + (tcpm_cc_is_source((port)->cc1) && tcpm_cc_is_source((port)->cc2)) + +#define tcpm_port_is_debug_sink(port) \ + (tcpm_cc_is_sink((port)->cc1) && tcpm_cc_is_sink((port)->cc2)) + #define tcpm_port_is_debug(port) \ - ((tcpm_cc_is_source((port)->cc1) && tcpm_cc_is_source((port)->cc2)) || \ - (tcpm_cc_is_sink((port)->cc1) && tcpm_cc_is_sink((port)->cc2))) + (tcpm_port_is_debug_source(port) || tcpm_port_is_debug_sink(port)) #define tcpm_port_is_audio(port) \ (tcpm_cc_is_audio((port)->cc1) && tcpm_cc_is_audio((port)->cc2)) @@ -4799,7 +4804,7 @@ static void run_state_machine(struct tcpm_port *port) tcpm_set_state(port, SNK_UNATTACHED, PD_T_DRP_SNK); break; case SRC_ATTACH_WAIT: - if (tcpm_port_is_debug(port)) + if (tcpm_port_is_debug_source(port)) tcpm_set_state(port, DEBUG_ACC_ATTACHED, port->timings.cc_debounce_time); else if (tcpm_port_is_audio(port)) @@ -5057,7 +5062,7 @@ static void run_state_machine(struct tcpm_port *port) tcpm_set_state(port, SRC_UNATTACHED, PD_T_DRP_SRC); break; case SNK_ATTACH_WAIT: - if (tcpm_port_is_debug(port)) + if (tcpm_port_is_debug_sink(port)) tcpm_set_state(port, DEBUG_ACC_ATTACHED, PD_T_CC_DEBOUNCE); else if (tcpm_port_is_audio(port)) @@ -5077,7 +5082,7 @@ static void run_state_machine(struct tcpm_port *port) if (tcpm_port_is_disconnected(port)) tcpm_set_state(port, SNK_UNATTACHED, PD_T_PD_DEBOUNCE); - else if (tcpm_port_is_debug(port)) + else if (tcpm_port_is_debug_sink(port)) tcpm_set_state(port, DEBUG_ACC_ATTACHED, PD_T_CC_DEBOUNCE); else if (tcpm_port_is_audio(port)) @@ -5948,10 +5953,10 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, switch (port->state) { case TOGGLING: - if (tcpm_port_is_debug(port) || tcpm_port_is_audio(port) || + if (tcpm_port_is_debug_source(port) || tcpm_port_is_audio(port) || tcpm_port_is_source(port)) tcpm_set_state(port, SRC_ATTACH_WAIT, 0); - else if (tcpm_port_is_sink(port)) + else if (tcpm_port_is_debug_sink(port) || tcpm_port_is_sink(port)) tcpm_set_state(port, SNK_ATTACH_WAIT, 0); break; case CHECK_CONTAMINANT: @@ -5959,9 +5964,11 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, break; case SRC_UNATTACHED: case ACC_UNATTACHED: - if (tcpm_port_is_debug(port) || tcpm_port_is_audio(port) || + if (tcpm_port_is_debug_source(port) || tcpm_port_is_audio(port) || tcpm_port_is_source(port)) tcpm_set_state(port, SRC_ATTACH_WAIT, 0); + else if (tcpm_port_is_debug_sink(port)) + tcpm_set_state(port, SNK_ATTACH_WAIT, 0); break; case SRC_ATTACH_WAIT: if (tcpm_port_is_disconnected(port) || @@ -5983,7 +5990,7 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, } break; case SNK_UNATTACHED: - if (tcpm_port_is_debug(port) || tcpm_port_is_audio(port) || + if (tcpm_port_is_debug_sink(port) || tcpm_port_is_audio(port) || tcpm_port_is_sink(port)) tcpm_set_state(port, SNK_ATTACH_WAIT, 0); break; From f8ad9ef7715658e7eebed5f96ea008dbb21e766c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Thu, 23 Apr 2026 10:11:31 -0300 Subject: [PATCH 331/554] ALSA: hda: cs35l56: Propagate ASP TX source control errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0faacc0841d66f3cf51989c10a83f3a82d52ff2c upstream. cs35l56_hda_mixer_get() ignores regmap_read() and cs35l56_hda_mixer_put() ignores regmap_update_bits_check(). This makes the ASP TX source controls report success when a regmap access fails. The write path returns no change instead of an error, and the read path continues after a failed read instead of aborting the control callback. Propagate the regmap errors, matching the posture and volume controls in this driver. Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Reviewed-by: Richard Fitzgerald Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260423-alsa-cs35l56-asp-tx-source-errors-v1-1-17ea7c62ec31@gmail.com Signed-off-by: Greg Kroah-Hartman --- sound/hda/codecs/side-codecs/cs35l56_hda.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/sound/hda/codecs/side-codecs/cs35l56_hda.c b/sound/hda/codecs/side-codecs/cs35l56_hda.c index acbacd0766064..82b7352e7ea97 100644 --- a/sound/hda/codecs/side-codecs/cs35l56_hda.c +++ b/sound/hda/codecs/side-codecs/cs35l56_hda.c @@ -180,11 +180,15 @@ static int cs35l56_hda_mixer_get(struct snd_kcontrol *kcontrol, { struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol); unsigned int reg_val; - int i; + int i, ret; cs35l56_hda_wait_dsp_ready(cs35l56); - regmap_read(cs35l56->base.regmap, kcontrol->private_value, ®_val); + ret = regmap_read(cs35l56->base.regmap, kcontrol->private_value, + ®_val); + if (ret) + return ret; + reg_val &= CS35L56_ASP_TXn_SRC_MASK; for (i = 0; i < CS35L56_NUM_INPUT_SRC; ++i) { @@ -203,15 +207,20 @@ static int cs35l56_hda_mixer_put(struct snd_kcontrol *kcontrol, struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol); unsigned int item = ucontrol->value.enumerated.item[0]; bool changed; + int ret; if (item >= CS35L56_NUM_INPUT_SRC) return -EINVAL; cs35l56_hda_wait_dsp_ready(cs35l56); - regmap_update_bits_check(cs35l56->base.regmap, kcontrol->private_value, - CS35L56_INPUT_MASK, cs35l56_tx_input_values[item], - &changed); + ret = regmap_update_bits_check(cs35l56->base.regmap, + kcontrol->private_value, + CS35L56_INPUT_MASK, + cs35l56_tx_input_values[item], + &changed); + if (ret) + return ret; return changed; } From ac3e9b55b7da6f0be51720bd330a0edc1a8b61f1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 24 Apr 2026 13:21:55 +0200 Subject: [PATCH 332/554] ALSA: pcm: oss: Fix data race at accessing runtime.oss.trigger commit 901ac0ff15edf9503162e2cf6579bd11a30f1ed4 upstream. Currently the runtime.oss.trigger field may be accessed concurrently without protection, which may lead to the data race. And, in this case, it may lead to more severe problem because it's a bit field; as writing the data, it may overwrite other bit fields as well, which confuses the operation completely, as spotted by fuzzing. Fix it by covering runtime.oss.trigger bit fled also with the existing params_lock mutex in both snd_pcm_oss_get_trigger() and snd_pcm_oss_poll(). Reported-and-tested-by: Jaeyoung Chung Closes: https://lore.kernel.org/20260423145330.210035-1-jjy600901@snu.ac.kr Cc: Link: https://patch.msgid.link/20260424112205.123703-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_oss.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index b12df5b5ddfc1..9b5a3def8d2ce 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -2146,10 +2146,16 @@ static int snd_pcm_oss_get_trigger(struct snd_pcm_oss_file *pcm_oss_file) psubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; csubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; - if (psubstream && psubstream->runtime && psubstream->runtime->oss.trigger) - result |= PCM_ENABLE_OUTPUT; - if (csubstream && csubstream->runtime && csubstream->runtime->oss.trigger) - result |= PCM_ENABLE_INPUT; + if (psubstream && psubstream->runtime) { + guard(mutex)(&psubstream->runtime->oss.params_lock); + if (psubstream->runtime->oss.trigger) + result |= PCM_ENABLE_OUTPUT; + } + if (csubstream && csubstream->runtime) { + guard(mutex)(&csubstream->runtime->oss.params_lock); + if (csubstream->runtime->oss.trigger) + result |= PCM_ENABLE_INPUT; + } return result; } @@ -2823,6 +2829,17 @@ static int snd_pcm_oss_capture_ready(struct snd_pcm_substream *substream) runtime->oss.period_frames; } +static bool need_input_retrigger(struct snd_pcm_runtime *runtime) +{ + bool ret; + + guard(mutex)(&runtime->oss.params_lock); + ret = runtime->oss.trigger; + if (ret) + runtime->oss.trigger = 0; + return ret; +} + static __poll_t snd_pcm_oss_poll(struct file *file, poll_table * wait) { struct snd_pcm_oss_file *pcm_oss_file; @@ -2855,11 +2872,11 @@ static __poll_t snd_pcm_oss_poll(struct file *file, poll_table * wait) snd_pcm_oss_capture_ready(csubstream)) mask |= EPOLLIN | EPOLLRDNORM; } - if (ostate != SNDRV_PCM_STATE_RUNNING && runtime->oss.trigger) { + if (ostate != SNDRV_PCM_STATE_RUNNING && + need_input_retrigger(runtime)) { struct snd_pcm_oss_file ofile; memset(&ofile, 0, sizeof(ofile)); ofile.streams[SNDRV_PCM_STREAM_CAPTURE] = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; - runtime->oss.trigger = 0; snd_pcm_oss_set_trigger(&ofile, PCM_ENABLE_INPUT); } } From d829caccb426c70165f4259fd4540342ff00f744 Mon Sep 17 00:00:00 2001 From: Yuriy Padlyak Date: Thu, 30 Apr 2026 01:09:03 +0300 Subject: [PATCH 333/554] ALSA: hda/realtek: Fix speaker silence after S3 resume on Xiaomi Mi Laptop Pro 15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 92a8b5e2eff6920bf815cd6a80b088ec3fdf01a3 upstream. The Xiaomi Mi Laptop Pro 15 (TM1905, subsystem 1d72:1905) ships with the Realtek ALC256 codec on Intel Comet Lake PCH-LP. After S3 resume the codec sets coefficient register 0x10 to 0x0220 instead of 0x0020 — bit 9 is erroneously set, which silences the internal speaker. Bluetooth and HDMI audio are unaffected because they use different paths. This is the same mechanism fixed for Clevo NJ51CU by commit edca7cc4b0ac ("ALSA: hda/realtek: Fix quirk for Clevo NJ51CU"), but the existing ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME also reconfigures pin 0x19 as a front mic, which is wrong for this Xiaomi where pin 0x19 default is 0x411111f0 (disabled). Add a minimal fixup that only clears the stuck coef bit, and add the Xiaomi SSID to the quirk table. Verified by reading coef 0x10 with hda-verb after resume (returns 0x0220), writing 0x0020, and confirming the internal speaker resumes output. With this fixup applied the bit is cleared on every codec init, including post-resume. Signed-off-by: Yuriy Padlyak Cc: Tested-by: Yuriy Padlyak Link: https://patch.msgid.link/20260429220903.14918-1-yuriypadlyak@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/hda/codecs/realtek/alc269.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 622cd353bf25c..844c4650230cd 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -3338,6 +3338,19 @@ static void alc256_fixup_mic_no_presence_and_resume(struct hda_codec *codec, } } +static void alc256_fixup_xiaomi_pro15_resume(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + /* + * On the Xiaomi Mi Laptop Pro 15 (TM1905, SSID 1d72:1905) the ALC256 + * codec sets coefficient 0x10 bit 9 to 1 after S3 resume, silencing + * the internal speaker. Bluetooth and HDMI audio are unaffected. + * Clear the bit so the speaker keeps working across suspend cycles. + */ + alc_update_coef_idx(codec, 0x10, 1<<9, 0); +} + static void alc256_decrease_headphone_amp_val(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -3925,6 +3938,7 @@ enum { ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, ALC233_FIXUP_NO_AUDIO_JACK, ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME, + ALC256_FIXUP_XIAOMI_PRO15_RESUME, ALC285_FIXUP_LEGION_Y9000X_SPEAKERS, ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE, ALC287_FIXUP_LEGION_16ACHG6, @@ -6099,6 +6113,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC }, + [ALC256_FIXUP_XIAOMI_PRO15_RESUME] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc256_fixup_xiaomi_pro15_resume, + }, [ALC287_FIXUP_LEGION_16ACHG6] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_legion_16achg6_speakers, @@ -7538,6 +7556,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1905, "Xiaomi Mi Laptop Pro 15", ALC256_FIXUP_XIAOMI_PRO15_RESUME), SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1e39, 0xca14, "MEDION NM14LNL", ALC233_FIXUP_MEDION_MTL_SPK), From d761428f6cc7c1a5b27a0551b07df6bf28f47546 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Sun, 3 May 2026 21:55:52 -0300 Subject: [PATCH 334/554] ALSA: firewire-tascam: Do not drop unread control events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0749daa8eb5ab90334aaad3b0671efd7150d43b1 upstream. tscm_hwdep_read_queue() copies as many queued control events as fit in the userspace buffer. When the buffer is smaller than the current contiguous queue segment, length is rounded down to the number of bytes that can be copied. However, after copying that shortened length, the code advances pull_pos to the original tail_pos, marking the whole contiguous segment as consumed. Any events between the copied portion and tail_pos are lost. Limit tail_pos to the position after the entries actually copied before updating pull_pos. When the whole segment fits, this is equivalent to the old tail_pos update; when the buffer is smaller, the remaining events stay queued for the next read. Fixes: a8c0d13267a4 ("ALSA: firewire-tascam: notify events of change of state for userspace applications") Cc: stable@vger.kernel.org Suggested-by: Takashi Sakamoto Signed-off-by: Cássio Gabriel Reviewed-by: Takashi Sakamoto Co-developed-by: Takashi Sakamoto Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260503-alsa-firewire-tascam-read-queue-v2-1-126c6efd7642@gmail.com Signed-off-by: Greg Kroah-Hartman --- sound/firewire/tascam/tascam-hwdep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/firewire/tascam/tascam-hwdep.c b/sound/firewire/tascam/tascam-hwdep.c index 867b4ea1096e1..6270263e7bf48 100644 --- a/sound/firewire/tascam/tascam-hwdep.c +++ b/sound/firewire/tascam/tascam-hwdep.c @@ -73,6 +73,7 @@ static long tscm_hwdep_read_queue(struct snd_tscm *tscm, char __user *buf, length = rounddown(remained, sizeof(*entries)); if (length == 0) break; + tail_pos = head_pos + length / sizeof(*entries); spin_unlock_irq(&tscm->lock); if (copy_to_user(pos, &entries[head_pos], length)) From 878c19dd4fbeaece17df54394f359ff3816b52d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Wed, 6 May 2026 00:34:47 -0300 Subject: [PATCH 335/554] ALSA: core: Serialize deferred fasync state checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5337213381df578058e2e41da93cbd0e4639935f upstream. snd_fasync_helper() updates fasync->on under snd_fasync_lock, and snd_fasync_work_fn() now also evaluates fasync->on under the same lock. snd_kill_fasync() still tests the flag before taking the lock, leaving an unsynchronized read against FASYNC enable/disable updates. Move the enabled-state check into the locked section. Also clear fasync->on under snd_fasync_lock in snd_fasync_free() before unlinking the pending entry. Together with the locked sender-side check, this publishes teardown before flushing the deferred work and prevents a racing sender from requeueing the entry after free has started. Fixes: ef34a0ae7a26 ("ALSA: core: Add async signal helpers") Fixes: 8146cd333d23 ("ALSA: core: Fix potential data race at fasync handling") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260506-alsa-core-fasync-on-lock-v1-1-ea48c77d6ca4@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/misc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/core/misc.c b/sound/core/misc.c index 5aca09edf9718..833124c8e4fa8 100644 --- a/sound/core/misc.c +++ b/sound/core/misc.c @@ -148,9 +148,11 @@ EXPORT_SYMBOL_GPL(snd_fasync_helper); void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll) { - if (!fasync || !fasync->on) + if (!fasync) return; guard(spinlock_irqsave)(&snd_fasync_lock); + if (!fasync->on) + return; fasync->signal = signal; fasync->poll = poll; list_move(&fasync->list, &snd_fasync_list); @@ -163,8 +165,10 @@ void snd_fasync_free(struct snd_fasync *fasync) if (!fasync) return; - scoped_guard(spinlock_irq, &snd_fasync_lock) + scoped_guard(spinlock_irq, &snd_fasync_lock) { + fasync->on = 0; list_del_init(&fasync->list); + } flush_work(&snd_fasync_work); kfree(fasync); From 9ef79d48f137facdaf0e0512b77362e724215d08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Wed, 6 May 2026 00:15:48 -0300 Subject: [PATCH 336/554] ALSA: seq: Fix UMP group 16 filtering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 92429ca999db99febced82f23362a71b2ba4c1d8 upstream. The sequencer UAPI defines group_filter as an unsigned int bitmap. Bit 0 filters groupless messages and bits 1-16 filter UMP groups 1-16. The internal snd_seq_client storage is only unsigned short, so bit 16 is truncated when userspace sets the filter. The same truncation affects the automatic UMP client filter used to avoid delivery to inactive groups, so events for group 16 cannot be filtered. Store the internal bitmap as unsigned int and keep both userspace-provided and automatically generated values limited to the defined UAPI bits. Fixes: d2b706077792 ("ALSA: seq: Add UMP group filter") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260506-alsa-seq-ump-group16-filter-v1-1-b75160bf6993@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_clientmgr.c | 2 +- sound/core/seq/seq_clientmgr.h | 5 ++++- sound/core/seq/seq_ump_client.c | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index f9a6e497f997c..46a1ded7eb068 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1252,7 +1252,7 @@ static int snd_seq_ioctl_set_client_info(struct snd_seq_client *client, if (client->user_pversion >= SNDRV_PROTOCOL_VERSION(1, 0, 3)) client->midi_version = client_info->midi_version; memcpy(client->event_filter, client_info->event_filter, 32); - client->group_filter = client_info->group_filter; + client->group_filter = client_info->group_filter & SND_SEQ_GROUP_FILTER_MASK; /* notify the change */ snd_seq_system_client_ev_client_change(client->number); diff --git a/sound/core/seq/seq_clientmgr.h b/sound/core/seq/seq_clientmgr.h index ece02c58db702..feea8bb7d9870 100644 --- a/sound/core/seq/seq_clientmgr.h +++ b/sound/core/seq/seq_clientmgr.h @@ -14,6 +14,9 @@ /* client manager */ +#define SND_SEQ_GROUP_FILTER_MASK GENMASK(SNDRV_UMP_MAX_GROUPS, 0) +#define SND_SEQ_GROUP_FILTER_GROUPS GENMASK(SNDRV_UMP_MAX_GROUPS, 1) + struct snd_seq_user_client { struct file *file; /* file struct of client */ /* ... */ @@ -40,7 +43,7 @@ struct snd_seq_client { int number; /* client number */ unsigned int filter; /* filter flags */ DECLARE_BITMAP(event_filter, 256); - unsigned short group_filter; + unsigned int group_filter; snd_use_lock_t use_lock; int event_lost; /* ports */ diff --git a/sound/core/seq/seq_ump_client.c b/sound/core/seq/seq_ump_client.c index 27247babb16de..ebbe1cbe0b9b7 100644 --- a/sound/core/seq/seq_ump_client.c +++ b/sound/core/seq/seq_ump_client.c @@ -369,7 +369,7 @@ static void setup_client_group_filter(struct seq_ump_client *client) cptr = snd_seq_kernel_client_get(client->seq_client); if (!cptr) return; - filter = ~(1U << 0); /* always allow groupless messages */ + filter = SND_SEQ_GROUP_FILTER_GROUPS; /* always allow groupless messages */ for (p = 0; p < SNDRV_UMP_MAX_GROUPS; p++) { if (client->ump->groups[p].active) filter &= ~(1U << (p + 1)); From ef49059fce11673b513475d369075003655af7c1 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Tue, 7 Apr 2026 18:13:44 +0530 Subject: [PATCH 337/554] powerpc/kdump: fix KASAN sanitization flag for core_$(BITS).o MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b3a97f9484080c6e71db9e803e3cc1bb372a9bc7 upstream. KASAN instrumentation is intended to be disabled for the kexec core code, but the existing Makefile entry misses the object suffix. As a result, the flag is not applied correctly to core_$(BITS).o. So when KASAN is enabled, kexec_copy_flush and copy_segments in kexec/core_64.c are instrumented, which can result in accesses to shadow memory via normal address translation paths. Since these run with the MMU disabled, such accesses may trigger page faults (bad_page_fault) that cannot be handled in the kdump path, ultimately causing a hang and preventing the kdump kernel from booting. The same is true for kexec as well, since the same functions are used there. Update the entry to include the “.o” suffix so that KASAN instrumentation is properly disabled for this object file. Fixes: 2ab2d5794f14 ("powerpc/kasan: Disable address sanitization in kexec paths") Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/all/1dee8891-8bcc-46b4-93f3-fc3a774abd5b@linux.ibm.com/ Cc: stable@vger.kernel.org Reviewed-by: Ritesh Harjani (IBM) Tested-by: Venkat Rao Bagalkote Acked-by: Mahesh Salgaonkar Reviewed-by: Aboorva Devarajan Tested-by: Aboorva Devarajan Signed-off-by: Sourabh Jain Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260407124349.1698552-1-sourabhjain@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kexec/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile index 470eb0453e17f..ec7a0eed75dc5 100644 --- a/arch/powerpc/kexec/Makefile +++ b/arch/powerpc/kexec/Makefile @@ -16,4 +16,4 @@ GCOV_PROFILE_core_$(BITS).o := n KCOV_INSTRUMENT_core_$(BITS).o := n UBSAN_SANITIZE_core_$(BITS).o := n KASAN_SANITIZE_core.o := n -KASAN_SANITIZE_core_$(BITS) := n +KASAN_SANITIZE_core_$(BITS).o := n From c73fdf2e911d5700c8f3b468a1a0a69b95aef5a7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 1 May 2026 09:16:38 +0200 Subject: [PATCH 338/554] x86/efi: Restore IRQ state in EFI page fault handler commit 2c340aab5485ebe9e33c01437dd4815ef33c8df5 upstream. The kernel's softirq API does not permit re-enabling softirqs while IRQs are disabled. The reason for this is that local_bh_enable() will not only re-enable delivery of softirqs over the back of IRQs, it will also handle any pending softirqs immediately, regardless of whether IRQs are enabled at that point. For this reason, commit d02198550423 ("x86/fpu: Improve crypto performance by making kernel-mode FPU reliably usable in softirqs") disables softirqs only when IRQs are enabled, as it is not permitted otherwise, but also unnecessary, given that asynchronous softirq delivery never happens to begin with while IRQs are disabled. However, this does mean that entering a kernel mode FPU section with IRQs enabled and leaving it with IRQs disabled leads to problems, as identified by Sashiko [0]: the EFI page fault handler is called from page_fault_oops() with IRQs disabled, and thus ends the kernel mode FPU section with IRQs disabled as well, regardless of whether IRQs were enabled when it was started. This may result in schedule() being called with a non-zero preempt_count, causing a BUG(). So take care to re-enable IRQs when handling any EFI page faults if they were taken with IRQs enabled. [0] https://sashiko.dev/#/patchset/20260430074107.27051-1-ivan.hu%40canonical.com Cc: Eric Biggers Cc: Ivan Hu Cc: x86@kernel.org Cc: Fixes: d02198550423 ("x86/fpu: Improve crypto performance by making kernel-mode FPU reliably usable in softirqs") Reviewed-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/efi.h | 3 ++- arch/x86/mm/fault.c | 2 +- arch/x86/platform/efi/quirks.c | 11 ++++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 51b4cdbea061a..f5932705f4b07 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -137,7 +137,8 @@ extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); extern int __init efi_reuse_config(u64 tables, int nr_tables); extern void efi_delete_dummy_variable(void); -extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr); +extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr, + const struct pt_regs *regs); extern void efi_unmap_boot_services(void); void arch_efi_call_virt_setup(void); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b83a06739b511..b33a52a3c515d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -686,7 +686,7 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code, * avoid hanging the system. */ if (IS_ENABLED(CONFIG_EFI)) - efi_crash_gracefully_on_page_fault(address); + efi_crash_gracefully_on_page_fault(address, regs); /* Only not-present faults should be handled by KFENCE. */ if (!(error_code & X86_PF_PROT) && diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 79f0818131e83..aa6b26c9cb308 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -761,7 +761,8 @@ int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, * @return: Returns, if the page fault is not handled. This function * will never return if the page fault is handled successfully. */ -void efi_crash_gracefully_on_page_fault(unsigned long phys_addr) +void efi_crash_gracefully_on_page_fault(unsigned long phys_addr, + const struct pt_regs *regs) { if (!IS_ENABLED(CONFIG_X86_64)) return; @@ -810,6 +811,14 @@ void efi_crash_gracefully_on_page_fault(unsigned long phys_addr) return; } + /* + * The API does not permit entering a kernel mode FPU section with + * interrupts enabled and leaving it with interrupts disabled. So + * re-enable interrupts now if they were enabled when the page fault + * occurred. + */ + local_irq_restore(regs->flags); + /* * Before calling EFI Runtime Service, the kernel has switched the * calling process to efi_mm. Hence, switch back to task_mm. From 0fe995ac77a81e6768474f9829d57b143e2f00cb Mon Sep 17 00:00:00 2001 From: Ruijie Li Date: Wed, 29 Apr 2026 00:41:43 +0800 Subject: [PATCH 339/554] xfrm: provide message size for XFRM_MSG_MAPPING commit 28465227c80fe417b4013c432be1f3737cb9f9a3 upstream. The compat 64=>32 translation path handles XFRM_MSG_MAPPING, but xfrm_msg_min[] does not provide the native payload size for this message type. Add the missing XFRM_MSG_MAPPING entry so compat translation can size and translate mapping notifications correctly. Fixes: 5461fc0c8d9f ("xfrm/compat: Add 64=>32-bit messages translator") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Ruijie Li Signed-off-by: Ren Wei Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b3f69c0760d4c..73d23ccdb5e2e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3314,6 +3314,7 @@ const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_MAPPING - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_mapping), [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), }; From 4980162de555cb838f1a189ce7d2cbf5d2e7b050 Mon Sep 17 00:00:00 2001 From: Michal Kosiorek Date: Wed, 29 Apr 2026 10:54:51 +0200 Subject: [PATCH 340/554] xfrm: defensively unhash xfrm_state lists in __xfrm_state_delete commit 14acf9652e5690de3c7486c6db5fb8dafd0a32a3 upstream. KASAN reproduces a slab-use-after-free in __xfrm_state_delete()'s hlist_del_rcu calls under syzkaller load on linux-6.12.y stable (reproduced on 6.12.47, also reachable via the same code path on torvalds/master and on the ipsec tree). Nine unique signatures cluster in the xfrm_state lifecycle, the load-bearing one being: BUG: KASAN: slab-use-after-free in __hlist_del include/linux/list.h:990 [inline] BUG: KASAN: slab-use-after-free in hlist_del_rcu include/linux/rculist.h:516 [inline] BUG: KASAN: slab-use-after-free in __xfrm_state_delete net/xfrm/xfrm_state.c Write of size 8 at addr ffff8881198bcb70 by task kworker/u8:9/435 Workqueue: netns cleanup_net Call Trace: __hlist_del / hlist_del_rcu __xfrm_state_delete xfrm_state_delete xfrm_state_flush xfrm_state_fini ops_exit_list cleanup_net The other observed signatures hit the same slab object from __xfrm_state_lookup, xfrm_alloc_spi, __xfrm_state_insert and an OOB write variant of __xfrm_state_delete, all on the byseq/byspi hash chains. __xfrm_state_delete() guards its byseq and byspi unhashes with value-based predicates: if (x->km.seq) hlist_del_rcu(&x->byseq); if (x->id.spi) hlist_del_rcu(&x->byspi); while everywhere else in the file (e.g. state_cache, state_cache_input) the safer hlist_unhashed() check is used. xfrm_alloc_spi() sets x->id.spi = newspi inside xfrm_state_lock and then immediately inserts into byspi, but a path that observes x->id.spi != 0 outside of xfrm_state_lock can still skip-or-hit the byspi unhash inconsistently with whether x is actually on the list. The same holds for x->km.seq versus byseq, and the bydst/bysrc unhashes have no predicate at all, so a second __xfrm_state_delete() on the same object writes through LIST_POISON pprev. The defensive change here: - Use hlist_del_init_rcu() instead of hlist_del_rcu() on bydst, bysrc, byseq and byspi so a second deletion is a no-op rather than a write through LIST_POISON pprev. The byseq/byspi nodes are already initialised in xfrm_state_alloc(). - Test hlist_unhashed() rather than the value predicate for byseq/byspi, so the unhash decision tracks list state rather than mutable scalar fields. Empirical verification: applied this patch on top of v6.12.47, rebuilt, and re-ran the same syzkaller harness for 1h16m on a previously-crashy configuration that produced ~100 hits each of slab-use-after-free Read in xfrm_alloc_spi / Read in __xfrm_state_lookup / Write in __xfrm_state_delete. After the patch, 7.1M execs across 32 VMs at ~1550 exec/sec produced zero xfrm_state UAF/OOB hits. /proc/slabinfo confirms the xfrm_state slab is actively allocated and freed during the run (~143 KiB resident), so the fuzzer is still exercising those code paths -- they just no longer crash. Reproduction: - Linux 6.12.47 x86_64 + KASAN_GENERIC + KASAN_INLINE + KCOV - syzkaller @ 746545b8b1e4c3a128db8652b340d3df90ce61db - 32 QEMU/KVM VMs x 2 vCPU on AWS c5.metal bare metal - 9 unique signatures collected in ~9h, all within xfrm_state lifecycle Fixes: fe9f1d8779cb ("xfrm: add state hashtable keyed by seq") Fixes: 7b4dc3600e48 ("[XFRM]: Do not add a state whose SPI is zero to the SPI hash.") Reported-by: Michal Kosiorek Tested-by: Michal Kosiorek Cc: stable@vger.kernel.org Signed-off-by: Michal Kosiorek Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_state.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a00c4fe1ab0ce..f6ba58f18ac18 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -818,17 +818,17 @@ int __xfrm_state_delete(struct xfrm_state *x) spin_lock(&net->xfrm.xfrm_state_lock); list_del(&x->km.all); - hlist_del_rcu(&x->bydst); - hlist_del_rcu(&x->bysrc); - if (x->km.seq) - hlist_del_rcu(&x->byseq); + hlist_del_init_rcu(&x->bydst); + hlist_del_init_rcu(&x->bysrc); + if (!hlist_unhashed(&x->byseq)) + hlist_del_init_rcu(&x->byseq); if (!hlist_unhashed(&x->state_cache)) hlist_del_rcu(&x->state_cache); if (!hlist_unhashed(&x->state_cache_input)) hlist_del_rcu(&x->state_cache_input); - if (x->id.spi) - hlist_del_rcu(&x->byspi); + if (!hlist_unhashed(&x->byspi)) + hlist_del_init_rcu(&x->byspi); net->xfrm.state_num--; xfrm_nat_keepalive_state_updated(x); spin_unlock(&net->xfrm.xfrm_state_lock); From 9d5047782f9bd2829e529df69209bf3232eb561f Mon Sep 17 00:00:00 2001 From: Yilin Zhu Date: Sun, 12 Apr 2026 13:07:54 +0800 Subject: [PATCH 341/554] ipv6: xfrm6: release dst on error in xfrm6_rcv_encap() commit bc0fcb9823cd0894934cf968b525c575833d7078 upstream. xfrm6_rcv_encap() performs an IPv6 route lookup when the skb does not already have a dst attached. ip6_route_input_lookup() returns a referenced dst entry even when the lookup resolves to an error route. If dst->error is set, xfrm6_rcv_encap() drops the skb without attaching the dst to the skb and without releasing the reference returned by the lookup. Repeated packets hitting this path therefore leak dst entries. Release the dst before jumping to the drop path. Fixes: 0146dca70b87 ("xfrm: add support for UDPv6 encapsulation of ESP") Cc: stable@kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Ruide Cao Signed-off-by: Yilin Zhu Signed-off-by: Ren Wei Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/ipv6/xfrm6_protocol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index ea2f805d3b014..9b586fcec4850 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -88,8 +88,10 @@ int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6, skb, flags); - if (dst->error) + if (dst->error) { + dst_release(dst); goto drop; + } skb_dst_set(skb, dst); } From 7db99a09b3bc87268287bc7ab5f2e7f382b5ad87 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 19 Apr 2026 18:35:42 -0400 Subject: [PATCH 342/554] xfrm: ah: account for ESN high bits in async callbacks commit ec54093e6a8f87e800bb6aa15eb7fc1e33faa524 upstream. AH allocates its temporary auth/ICV layout differently when ESN is enabled: the async ahash setup appends a 4-byte seqhi slot before the ICV or auth_data area, but the async completion callbacks still reconstruct the temporary layout as if seqhi were absent. With an async AH implementation selected, that makes AH copy or compare the wrong bytes on both the IPv4 and IPv6 paths. In UML repro on IPv4 AH with ESN and forced async hmac(sha1), ping fails with 100% packet loss, and the callback logs show the pre-fix drift: ah4 output_done: esn=1 err=0 icv_off=20 expected_off=24 ah4 input_done: esn=1 auth_off=20 expected_auth_off=24 icv_off=32 expected_icv_off=36 Reconstruct the callback-side layout the same way the setup path built it by skipping the ESN seqhi slot before locating the saved auth_data or ICV. Per RFC 4302, the ESN high-order 32 bits participate in the AH ICV computation, so the async callbacks must account for the seqhi slot. Post-fix, the same IPv4 AH+ESN+forced-async-hmac(sha1) UML repro shows the corrected offset (ah4 output_done: esn=1 err=0 icv_off=24 expected_off=24) and ping succeeds; net/ipv4/ah4.o and net/ipv6/ah6.o build clean at W=1. IPv6 AH+ESN was not exercised at runtime, and the change has not been tested against a real async hardware AH engine. Fixes: d4d573d0334d ("{IPv4,xfrm} Add ESN support for AH egress part") Fixes: d8b2a8600b0e ("{IPv4,xfrm} Add ESN support for AH ingress part") Fixes: 26dd70c3fad3 ("{IPv6,xfrm} Add ESN support for AH egress part") Fixes: 8d6da6f32557 ("{IPv6,xfrm} Add ESN support for AH ingress part") Cc: stable@vger.kernel.org Assisted-by: Codex:gpt-5-4 Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ah4.c | 14 ++++++++++++-- net/ipv6/ah6.c | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 64aec3dff8ec8..8b0f15abbb38a 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -124,9 +124,14 @@ static void ah_output_done(void *data, int err) struct iphdr *top_iph = ip_hdr(skb); struct ip_auth_hdr *ah = ip_auth_hdr(skb); int ihl = ip_hdrlen(skb); + int seqhi_len = 0; + __be32 *seqhi; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); iph = AH_SKB_CB(skb)->tmp; - icv = ah_tmp_icv(iph, ihl); + seqhi = (__be32 *)((char *)iph + ihl); + icv = ah_tmp_icv(seqhi, seqhi_len); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; @@ -270,12 +275,17 @@ static void ah_input_done(void *data, int err) struct ip_auth_hdr *ah = ip_auth_hdr(skb); int ihl = ip_hdrlen(skb); int ah_hlen = (ah->hdrlen + 2) << 2; + int seqhi_len = 0; + __be32 *seqhi; if (err) goto out; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); work_iph = AH_SKB_CB(skb)->tmp; - auth_data = ah_tmp_auth(work_iph, ihl); + seqhi = (__be32 *)((char *)work_iph + ihl); + auth_data = ah_tmp_auth(seqhi, seqhi_len); icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 95372e0f1d216..bd9ec8000f0b5 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -317,14 +317,19 @@ static void ah6_output_done(void *data, int err) struct ipv6hdr *top_iph = ipv6_hdr(skb); struct ip_auth_hdr *ah = ip_auth_hdr(skb); struct tmp_ext *iph_ext; + int seqhi_len = 0; + __be32 *seqhi; extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr); if (extlen) extlen += sizeof(*iph_ext); + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); iph_base = AH_SKB_CB(skb)->tmp; iph_ext = ah_tmp_ext(iph_base); - icv = ah_tmp_icv(iph_ext, extlen); + seqhi = (__be32 *)((char *)iph_ext + extlen); + icv = ah_tmp_icv(seqhi, seqhi_len); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); memcpy(top_iph, iph_base, IPV6HDR_BASELEN); @@ -471,13 +476,18 @@ static void ah6_input_done(void *data, int err) struct ip_auth_hdr *ah = ip_auth_hdr(skb); int hdr_len = skb_network_header_len(skb); int ah_hlen = ipv6_authlen(ah); + int seqhi_len = 0; + __be32 *seqhi; if (err) goto out; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); work_iph = AH_SKB_CB(skb)->tmp; auth_data = ah_tmp_auth(work_iph, hdr_len); - icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); + seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len); + icv = ah_tmp_icv(seqhi, seqhi_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) From e3e722ea88e051ae5361dc540c01ba18f87b5ffd Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 10 Apr 2026 15:29:50 -0400 Subject: [PATCH 343/554] selinux: fix avdcache auditing commit f92d542577db878acfd21cc18dab23d03023b217 upstream. The per-task avdcache was incorrectly saving and reusing the audited vector computed by avc_audit_required() rather than recomputing based on the currently requested permissions and distinguishing the denied versus allowed cases. As a result, some permission checks were not being audited, e.g. directory write checks after a previously cached directory search check. Cc: stable@vger.kernel.org Fixes: dde3a5d0f4dce ("selinux: move avdcache to per-task security struct") Signed-off-by: Stephen Smalley [PM: line wrap tweaks] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/hooks.c | 31 +++++++++++++------------------ security/selinux/include/objsec.h | 4 +--- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index e713291db873c..b2c54c12b6136 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3162,15 +3162,13 @@ static inline int task_avdcache_search(struct task_security_struct *tsec, * @tsec: the task's security state * @isec: the inode associated with the cache entry * @avd: the AVD to cache - * @audited: the permission audit bitmask to cache * - * Update the AVD cache in @tsec with the @avdc and @audited info associated + * Update the AVD cache in @tsec with the @avd info associated * with @isec. */ static inline void task_avdcache_update(struct task_security_struct *tsec, struct inode_security_struct *isec, - struct av_decision *avd, - u32 audited) + struct av_decision *avd) { int spot; @@ -3182,9 +3180,7 @@ static inline void task_avdcache_update(struct task_security_struct *tsec, spot = (tsec->avdcache.dir_spot + 1) & (TSEC_AVDC_DIR_SIZE - 1); tsec->avdcache.dir_spot = spot; tsec->avdcache.dir[spot].isid = isec->sid; - tsec->avdcache.dir[spot].audited = audited; - tsec->avdcache.dir[spot].allowed = avd->allowed; - tsec->avdcache.dir[spot].permissive = avd->flags & AVD_FLAGS_PERMISSIVE; + tsec->avdcache.dir[spot].avd = *avd; tsec->avdcache.permissive_neveraudit = (avd->flags == (AVD_FLAGS_PERMISSIVE|AVD_FLAGS_NEVERAUDIT)); } @@ -3205,6 +3201,7 @@ static int selinux_inode_permission(struct inode *inode, int requested) struct task_security_struct *tsec; struct inode_security_struct *isec; struct avdc_entry *avdc; + struct av_decision avd, *avdp = &avd; int rc, rc2; u32 audited, denied; @@ -3226,23 +3223,21 @@ static int selinux_inode_permission(struct inode *inode, int requested) rc = task_avdcache_search(tsec, isec, &avdc); if (likely(!rc)) { /* Cache hit. */ - audited = perms & avdc->audited; - denied = perms & ~avdc->allowed; - if (unlikely(denied && enforcing_enabled() && - !avdc->permissive)) + avdp = &avdc->avd; + denied = perms & ~avdp->allowed; + if (unlikely(denied) && enforcing_enabled() && + !(avdp->flags & AVD_FLAGS_PERMISSIVE)) rc = -EACCES; } else { - struct av_decision avd; - /* Cache miss. */ rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, - perms, 0, &avd); - audited = avc_audit_required(perms, &avd, rc, - (requested & MAY_ACCESS) ? FILE__AUDIT_ACCESS : 0, - &denied); - task_avdcache_update(tsec, isec, &avd, audited); + perms, 0, avdp); + task_avdcache_update(tsec, isec, avdp); } + audited = avc_audit_required(perms, avdp, rc, + (requested & MAY_ACCESS) ? + FILE__AUDIT_ACCESS : 0, &denied); if (likely(!audited)) return rc; diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 8fc3de5234acd..816fde5a5896c 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -32,9 +32,7 @@ struct avdc_entry { u32 isid; /* inode SID */ - u32 allowed; /* allowed permission bitmask */ - u32 audited; /* audited permission bitmask */ - bool permissive; /* AVC permissive flag */ + struct av_decision avd; /* av decision */ }; struct cred_security_struct { From d350fef4bc2467fe1bce15f7a20fe60e01ce41ad Mon Sep 17 00:00:00 2001 From: Zongyao Chen Date: Fri, 24 Apr 2026 15:37:53 +0800 Subject: [PATCH 344/554] selinux: use sk blob accessor in socket permission helpers commit 032e70aff025d7c519af9ab791cd084380619263 upstream. SELinux socket state lives in the composite LSM socket blob. sock_has_perm() and nlmsg_sock_has_extended_perms() currently dereference sk->sk_security directly, which assumes the SELinux socket blob is at offset zero. In stacked configurations that assumption does not hold. If another LSM allocates socket blob storage before SELinux, these helpers may read the wrong blob and feed invalid SID and class values into AVC checks. Use selinux_sock() instead of accessing sk->sk_security directly. Fixes: d1d991efaf34 ("selinux: Add netlink xperm support") Cc: stable@vger.kernel.org # v6.13+ Signed-off-by: Zongyao Chen Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/hooks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b2c54c12b6136..1135ab8a85fc5 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4780,7 +4780,7 @@ static bool sock_skip_has_perm(u32 sid) static int sock_has_perm(struct sock *sk, u32 perms) { - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); struct common_audit_data ad; struct lsm_network_audit net; @@ -6087,7 +6087,7 @@ static unsigned int selinux_ip_postroute(void *priv, static int nlmsg_sock_has_extended_perms(struct sock *sk, u32 perms, u16 nlmsg_type) { - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); struct common_audit_data ad; u8 driver; u8 xperm; From 71e68e863a7e18b2b68e5990dbca7d670f0ab5c8 Mon Sep 17 00:00:00 2001 From: David Windsor Date: Sun, 26 Apr 2026 19:23:49 -0400 Subject: [PATCH 345/554] selinux: don't reserve xattr slot when we won't fill it commit 1e5a8eed7821e7a43a31b4c1b3675a91be6bc6f6 upstream. Move lsm_get_xattr_slot() below the SBLABEL_MNT check so we don't leave a NULL-named slot in the array when returning -EOPNOTSUPP; filesystem initxattrs() callbacks stop iterating at the first NULL ->name, silently dropping xattrs installed by later LSMs. Cc: stable@vger.kernel.org Signed-off-by: David Windsor Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/hooks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 1135ab8a85fc5..3da3017ad2ca0 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2931,7 +2931,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, { const struct cred_security_struct *crsec = selinux_cred(current_cred()); struct superblock_security_struct *sbsec; - struct xattr *xattr = lsm_get_xattr_slot(xattrs, xattr_count); + struct xattr *xattr; u32 newsid, clen; u16 newsclass; int rc; @@ -2957,6 +2957,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, !(sbsec->flags & SBLABEL_MNT)) return -EOPNOTSUPP; + xattr = lsm_get_xattr_slot(xattrs, xattr_count); if (xattr) { rc = security_sid_to_context_force(newsid, &context, &clen); From e5c12a115ed5b73e75013f0b00811d5811a3b859 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 30 Apr 2026 14:36:52 -0400 Subject: [PATCH 346/554] selinux: shrink critical section in sel_write_load() commit 868f31e4061eca8c3cd607d79d954d5e54f204aa upstream. Currently sel_write_load() takes the policy mutex earlier than necessary. Move the taking of the mutex later. This avoids holding it unnecessarily across the vmalloc() and copy_from_user() of the policy data. Cc: stable@vger.kernel.org Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/selinuxfs.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 232e087bce3ee..1f109b23bae24 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -583,34 +583,31 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, if (!count) return -EINVAL; - mutex_lock(&selinux_state.policy_mutex); - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__LOAD_POLICY, NULL); if (length) - goto out; + return length; data = vmalloc(count); - if (!data) { - length = -ENOMEM; - goto out; - } + if (!data) + return -ENOMEM; if (copy_from_user(data, buf, count) != 0) { length = -EFAULT; goto out; } + mutex_lock(&selinux_state.policy_mutex); length = security_load_policy(data, count, &load_state); if (length) { pr_warn_ratelimited("SELinux: failed to load policy\n"); - goto out; + goto out_unlock; } fsi = file_inode(file)->i_sb->s_fs_info; length = sel_make_policy_nodes(fsi, load_state.policy); if (length) { pr_warn_ratelimited("SELinux: failed to initialize selinuxfs\n"); selinux_policy_cancel(&load_state); - goto out; + goto out_unlock; } selinux_policy_commit(&load_state); @@ -620,8 +617,9 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); -out: +out_unlock: mutex_unlock(&selinux_state.policy_mutex); +out: vfree(data); return length; } From 52a5d9608da54fc9f1907001b1154ea22408fad0 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 5 May 2026 08:49:48 -0400 Subject: [PATCH 347/554] selinux: prune /sys/fs/selinux/checkreqprot commit 644132a48f4e28a1d949d162160869286f3e75de upstream. commit a7e4676e8e2cb ("selinux: remove the 'checkreqprot' functionality") removed the ability to modify the checkreqprot setting but left everything except the updating of the checkreqprot value intact. Aside from unnecessary processing, this could produce a local DoS from log spam and incorrectly calls selinux_ima_measure_state() on each write even though no state has changed. Prune it to just log an error message once and return count (i.e. all bytes written successfully) so that userspace never breaks. Cc: stable@vger.kernel.org Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/selinuxfs.c | 47 ++++++------------------------------ 1 file changed, 7 insertions(+), 40 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 1f109b23bae24..ceb0385c840f2 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -676,46 +676,13 @@ static ssize_t sel_read_checkreqprot(struct file *filp, char __user *buf, static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - char *page; - ssize_t length; - unsigned int new_value; - - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, - SECCLASS_SECURITY, SECURITY__SETCHECKREQPROT, - NULL); - if (length) - return length; - - if (count >= PAGE_SIZE) - return -ENOMEM; - - /* No partial writes. */ - if (*ppos != 0) - return -EINVAL; - - page = memdup_user_nul(buf, count); - if (IS_ERR(page)) - return PTR_ERR(page); - - if (sscanf(page, "%u", &new_value) != 1) { - length = -EINVAL; - goto out; - } - length = count; - - if (new_value) { - char comm[sizeof(current->comm)]; - - strscpy(comm, current->comm); - pr_err("SELinux: %s (%d) set checkreqprot to 1. This is no longer supported.\n", - comm, current->pid); - } - - selinux_ima_measure_state(); - -out: - kfree(page); - return length; + /* + * Setting checkreqprot is no longer supported, see + * https://github.com/SELinuxProject/selinux-kernel/wiki/DEPRECATE-checkreqprot + */ + pr_err_once("SELinux: %s (%d) wrote to checkreqprot. This is no longer supported.\n", + current->comm, current->pid); + return count; } static const struct file_operations sel_checkreqprot_ops = { .read = sel_read_checkreqprot, From 058a5e705424814c91d585fb84c6059e5edbcba5 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 5 May 2026 08:49:49 -0400 Subject: [PATCH 348/554] selinux: prune /sys/fs/selinux/disable commit 19cfa0099024bb9cd40f6d950caa7f47ff8e77f6 upstream. Commit f22f9aaf6c3d ("selinux: remove the runtime disable functionality") removed the underlying SELinux runtime disable functionality but left everything else intact and started logging an error message to warn any residual users. Prune it to just log an error message once and to return count (i.e. all bytes written successfully) to avoid breaking userspace. This also fixes a local DoS from logspam. Cc: stable@vger.kernel.org Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/selinuxfs.c | 36 +++++++----------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index ceb0385c840f2..5e1de5844c899 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -272,35 +272,13 @@ static ssize_t sel_write_disable(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - char *page; - ssize_t length; - int new_value; - - if (count >= PAGE_SIZE) - return -ENOMEM; - - /* No partial writes. */ - if (*ppos != 0) - return -EINVAL; - - page = memdup_user_nul(buf, count); - if (IS_ERR(page)) - return PTR_ERR(page); - - if (sscanf(page, "%d", &new_value) != 1) { - length = -EINVAL; - goto out; - } - length = count; - - if (new_value) { - pr_err("SELinux: https://github.com/SELinuxProject/selinux-kernel/wiki/DEPRECATE-runtime-disable\n"); - pr_err("SELinux: Runtime disable is not supported, use selinux=0 on the kernel cmdline.\n"); - } - -out: - kfree(page); - return length; + /* + * Setting disable is no longer supported, see + * https://github.com/SELinuxProject/selinux-kernel/wiki/DEPRECATE-runtime-disable + */ + pr_err_once("SELinux: %s (%d) wrote to disable. This is no longer supported.\n", + current->comm, current->pid); + return count; } static const struct file_operations sel_disable_ops = { From 05b63fbddfca7dd434b952a9e94dc170eb36ea37 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 5 May 2026 08:49:50 -0400 Subject: [PATCH 349/554] selinux: prune /sys/fs/selinux/user commit ad1ac3d740cc6b858a99ab9c45c8c0574be7d1d3 upstream. Remove the previously deprecated /sys/fs/selinux/user interface aside from a residual stub for userspace compatibility. Commit d7b6918e22c7 ("selinux: Deprecate /sys/fs/selinux/user") started the deprecation process for /sys/fs/selinux/user: The selinuxfs "user" node allows userspace to request a list of security contexts that can be reached for a given SELinux user from a given starting context. This was used by libselinux when various login-style programs requested contexts for users, but libselinux stopped using it in 2020. Kernel support will be removed no sooner than Dec 2025. A pr_warn() message has been in place since Linux v6.13, and a 5 second sleep was introduced since Linux v6.17 to help make it more noticeable. We are now past the stated deadline of Dec 2025, so remove the underlying functionality and replace it with a stub that returns a '0\0' buffer to avoid breaking userspace. This also avoids a local DoS from logspam and an uninterruptible sleep delay. Cc: stable@vger.kernel.org Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- .../{obsolete => removed}/sysfs-selinux-user | 0 security/selinux/include/security.h | 2 - security/selinux/selinuxfs.c | 68 +--------- security/selinux/ss/services.c | 125 ------------------ 4 files changed, 5 insertions(+), 190 deletions(-) rename Documentation/ABI/{obsolete => removed}/sysfs-selinux-user (100%) diff --git a/Documentation/ABI/obsolete/sysfs-selinux-user b/Documentation/ABI/removed/sysfs-selinux-user similarity index 100% rename from Documentation/ABI/obsolete/sysfs-selinux-user rename to Documentation/ABI/removed/sysfs-selinux-user diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 0f954a40d3fc7..30e3fa962f6fa 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -301,8 +301,6 @@ int security_context_to_sid_default(const char *scontext, u32 scontext_len, int security_context_to_sid_force(const char *scontext, u32 scontext_len, u32 *sid); -int security_get_user_sids(u32 fromsid, const char *username, u32 **sids, u32 *nel); - int security_port_sid(u8 protocol, u16 port, u32 *out_sid); int security_ib_pkey_sid(u64 subnet_prefix, u16 pkey_num, u32 *out_sid); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 5e1de5844c899..e821519205978 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1005,69 +1005,11 @@ static ssize_t sel_write_relabel(struct file *file, char *buf, size_t size) static ssize_t sel_write_user(struct file *file, char *buf, size_t size) { - char *con = NULL, *user = NULL, *ptr; - u32 sid, *sids = NULL; - ssize_t length; - char *newcon; - int rc; - u32 i, len, nsids; - - pr_warn_ratelimited("SELinux: %s (%d) wrote to /sys/fs/selinux/user!" - " This will not be supported in the future; please update your" - " userspace.\n", current->comm, current->pid); - ssleep(5); - - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, - SECCLASS_SECURITY, SECURITY__COMPUTE_USER, - NULL); - if (length) - goto out; - - length = -ENOMEM; - con = kzalloc(size + 1, GFP_KERNEL); - if (!con) - goto out; - - length = -ENOMEM; - user = kzalloc(size + 1, GFP_KERNEL); - if (!user) - goto out; - - length = -EINVAL; - if (sscanf(buf, "%s %s", con, user) != 2) - goto out; - - length = security_context_str_to_sid(con, &sid, GFP_KERNEL); - if (length) - goto out; - - length = security_get_user_sids(sid, user, &sids, &nsids); - if (length) - goto out; - - length = sprintf(buf, "%u", nsids) + 1; - ptr = buf + length; - for (i = 0; i < nsids; i++) { - rc = security_sid_to_context(sids[i], &newcon, &len); - if (rc) { - length = rc; - goto out; - } - if ((length + len) >= SIMPLE_TRANSACTION_LIMIT) { - kfree(newcon); - length = -ERANGE; - goto out; - } - memcpy(ptr, newcon, len); - kfree(newcon); - ptr += len; - length += len; - } -out: - kfree(sids); - kfree(user); - kfree(con); - return length; + pr_err_once("SELinux: %s (%d) wrote to user. This is no longer supported.\n", + current->comm, current->pid); + buf[0] = '0'; + buf[1] = 0; + return 2; } static ssize_t sel_write_member(struct file *file, char *buf, size_t size) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 713130bd43c47..1f5b05a59f849 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2746,131 +2746,6 @@ int security_node_sid(u16 domain, return rc; } -#define SIDS_NEL 25 - -/** - * security_get_user_sids - Obtain reachable SIDs for a user. - * @fromsid: starting SID - * @username: username - * @sids: array of reachable SIDs for user - * @nel: number of elements in @sids - * - * Generate the set of SIDs for legal security contexts - * for a given user that can be reached by @fromsid. - * Set *@sids to point to a dynamically allocated - * array containing the set of SIDs. Set *@nel to the - * number of elements in the array. - */ - -int security_get_user_sids(u32 fromsid, - const char *username, - u32 **sids, - u32 *nel) -{ - struct selinux_policy *policy; - struct policydb *policydb; - struct sidtab *sidtab; - struct context *fromcon, usercon; - u32 *mysids = NULL, *mysids2, sid; - u32 i, j, mynel, maxnel = SIDS_NEL; - struct user_datum *user; - struct role_datum *role; - struct ebitmap_node *rnode, *tnode; - int rc; - - *sids = NULL; - *nel = 0; - - if (!selinux_initialized()) - return 0; - - mysids = kcalloc(maxnel, sizeof(*mysids), GFP_KERNEL); - if (!mysids) - return -ENOMEM; - -retry: - mynel = 0; - rcu_read_lock(); - policy = rcu_dereference(selinux_state.policy); - policydb = &policy->policydb; - sidtab = policy->sidtab; - - context_init(&usercon); - - rc = -EINVAL; - fromcon = sidtab_search(sidtab, fromsid); - if (!fromcon) - goto out_unlock; - - rc = -EINVAL; - user = symtab_search(&policydb->p_users, username); - if (!user) - goto out_unlock; - - usercon.user = user->value; - - ebitmap_for_each_positive_bit(&user->roles, rnode, i) { - role = policydb->role_val_to_struct[i]; - usercon.role = i + 1; - ebitmap_for_each_positive_bit(&role->types, tnode, j) { - usercon.type = j + 1; - - if (mls_setup_user_range(policydb, fromcon, user, - &usercon)) - continue; - - rc = sidtab_context_to_sid(sidtab, &usercon, &sid); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out_unlock; - if (mynel < maxnel) { - mysids[mynel++] = sid; - } else { - rc = -ENOMEM; - maxnel += SIDS_NEL; - mysids2 = kcalloc(maxnel, sizeof(*mysids2), GFP_ATOMIC); - if (!mysids2) - goto out_unlock; - memcpy(mysids2, mysids, mynel * sizeof(*mysids2)); - kfree(mysids); - mysids = mysids2; - mysids[mynel++] = sid; - } - } - } - rc = 0; -out_unlock: - rcu_read_unlock(); - if (rc || !mynel) { - kfree(mysids); - return rc; - } - - rc = -ENOMEM; - mysids2 = kcalloc(mynel, sizeof(*mysids2), GFP_KERNEL); - if (!mysids2) { - kfree(mysids); - return rc; - } - for (i = 0, j = 0; i < mynel; i++) { - struct av_decision dummy_avd; - rc = avc_has_perm_noaudit(fromsid, mysids[i], - SECCLASS_PROCESS, /* kernel value */ - PROCESS__TRANSITION, AVC_STRICT, - &dummy_avd); - if (!rc) - mysids2[j++] = mysids[i]; - cond_resched(); - } - kfree(mysids); - *sids = mysids2; - *nel = j; - return 0; -} - /** * __security_genfs_sid - Helper to obtain a SID for a file in a filesystem * @policy: policy From 53ad20efd20a2cf0be4c03c4083ece17b2bae910 Mon Sep 17 00:00:00 2001 From: Tao Cui Date: Mon, 4 May 2026 09:00:38 +0800 Subject: [PATCH 350/554] LoongArch: KVM: Fix missing EMULATE_FAIL in kvm_emu_mmio_read() commit f26faae96c411a70641e4d21b759475caa6122d5 upstream. In the ldptr (0x24...0x27) opcode decoding path, the default case only breaks out but without setting "ret" value to EMULATE_FAIL. This leaves run->mmio.len uninitialized (stale from a previous MMIO operation) while "ret" value remains EMULATE_DO_MMIO, causing the code to proceed with an incorrect MMIO length. Add "ret = EMULATE_FAIL" to match the other default branches in the same function (e.g. the 0x28...0x2e and 0x38 cases). Cc: stable@vger.kernel.org Reviewed-by: Bibo Mao Signed-off-by: Tao Cui Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kvm/exit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index cb493980d874a..7c3258476432b 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -390,6 +390,7 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) run->mmio.len = 8; break; default: + ret = EMULATE_FAIL; break; } break; From b40cdd1b1370d76e9e760af4490cb4a351cceead Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Tue, 21 Apr 2026 13:08:44 -0400 Subject: [PATCH 351/554] Bluetooth: virtio_bt: clamp rx length before skb_put commit 21bd244b6de5d2fe1063c23acc93fbdd2b20d112 upstream. virtbt_rx_work() calls skb_put(skb, len) where len comes directly from virtqueue_get_buf() with no validation against the buffer we posted to the device. The RX skb is allocated in virtbt_add_inbuf() and exposed to virtio as exactly 1000 bytes via sg_init_one(). Checking len against skb_tailroom(skb) is not sufficient because alloc_skb() can leave more tailroom than the 1000 bytes actually handed to the device. A malicious or buggy backend can therefore report used.len between 1001 and skb_tailroom(skb), causing skb_put() to include uninitialized kernel heap bytes that were never written by the device. The same path also accepts len == 0, in which case skb_put(skb, 0) leaves the skb empty but virtbt_rx_handle() still reads the pkt_type byte from skb->data, consuming uninitialized memory. Define VIRTBT_RX_BUF_SIZE once and reuse it in alloc_skb() and sg_init_one(), and gate virtbt_rx_work() on that same constant so the bound checked matches the buffer actually exposed to the device. Reject used.len == 0 in the same gate so an empty completion can no longer reach virtbt_rx_handle(). Use bt_dev_err_ratelimited() because the length value comes from an untrusted backend that can otherwise flood the kernel log. Same class of bug as commit c04db81cd028 ("net/9p: Fix buffer overflow in USB transport layer"), which hardened the USB 9p transport against unchecked device-reported length. Fixes: 160fbcf3bfb9 ("Bluetooth: virtio_bt: Use skb_put to set length") Cc: stable@vger.kernel.org Cc: Soenke Huster Signed-off-by: Michael Bommarito Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/virtio_bt.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index 6f1a37e85c6a4..3a0f6715cd994 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -12,6 +12,7 @@ #include #define VERSION "0.1" +#define VIRTBT_RX_BUF_SIZE 1000 enum { VIRTBT_VQ_TX, @@ -33,11 +34,11 @@ static int virtbt_add_inbuf(struct virtio_bluetooth *vbt) struct sk_buff *skb; int err; - skb = alloc_skb(1000, GFP_KERNEL); + skb = alloc_skb(VIRTBT_RX_BUF_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; - sg_init_one(sg, skb->data, 1000); + sg_init_one(sg, skb->data, VIRTBT_RX_BUF_SIZE); err = virtqueue_add_inbuf(vq, sg, 1, skb, GFP_KERNEL); if (err < 0) { @@ -227,8 +228,15 @@ static void virtbt_rx_work(struct work_struct *work) if (!skb) return; - skb_put(skb, len); - virtbt_rx_handle(vbt, skb); + if (!len || len > VIRTBT_RX_BUF_SIZE) { + bt_dev_err_ratelimited(vbt->hdev, + "rx reply len %u outside [1, %u]\n", + len, VIRTBT_RX_BUF_SIZE); + kfree_skb(skb); + } else { + skb_put(skb, len); + virtbt_rx_handle(vbt, skb); + } if (virtbt_add_inbuf(vbt) < 0) return; From 3485c7236c59c8c34a41af1c4b52982437554e79 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Tue, 21 Apr 2026 13:08:45 -0400 Subject: [PATCH 352/554] Bluetooth: virtio_bt: validate rx pkt_type header length commit daf23014e5d975e72ea9c02b5160d3fcf070ea47 upstream. virtbt_rx_handle() reads the leading pkt_type byte from the RX skb and forwards the remainder to hci_recv_frame() for every event/ACL/SCO/ISO type, without checking that the remaining payload is at least the fixed HCI header for that type. After the preceding patch bounds the backend-supplied used.len to [1, VIRTBT_RX_BUF_SIZE], a one-byte completion still reaches hci_recv_frame() with skb->len already pulled to 0. If the byte happened to be HCI_ACLDATA_PKT, the ACL-vs-ISO classification fast-path in hci_dev_classify_pkt_type() dereferences hci_acl_hdr(skb)->handle whenever the HCI device has an active CIS_LINK, BIS_LINK, or PA_LINK connection, reading two bytes of uninitialized RX-buffer data. The same hazard exists for every packet type the driver accepts because none of the switch cases in virtbt_rx_handle() check skb->len against the per-type minimum HCI header size before handing the frame to the core. After stripping pkt_type, require skb->len to cover the fixed header size for the selected type (event 2, ACL 4, SCO 3, ISO 4) before calling hci_recv_frame(); drop ratelimited otherwise. Unknown pkt_type values still take the original kfree_skb() default path. Use bt_dev_err_ratelimited() because both the length and pkt_type values come from an untrusted backend that can otherwise flood the kernel log. Fixes: 160fbcf3bfb9 ("Bluetooth: virtio_bt: Use skb_put to set length") Cc: stable@vger.kernel.org Cc: Soenke Huster Signed-off-by: Michael Bommarito Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/virtio_bt.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index 3a0f6715cd994..9341c5eed5b53 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -198,6 +198,7 @@ static int virtbt_shutdown_generic(struct hci_dev *hdev) static void virtbt_rx_handle(struct virtio_bluetooth *vbt, struct sk_buff *skb) { + size_t min_hdr; __u8 pkt_type; pkt_type = *((__u8 *) skb->data); @@ -205,16 +206,32 @@ static void virtbt_rx_handle(struct virtio_bluetooth *vbt, struct sk_buff *skb) switch (pkt_type) { case HCI_EVENT_PKT: + min_hdr = sizeof(struct hci_event_hdr); + break; case HCI_ACLDATA_PKT: + min_hdr = sizeof(struct hci_acl_hdr); + break; case HCI_SCODATA_PKT: + min_hdr = sizeof(struct hci_sco_hdr); + break; case HCI_ISODATA_PKT: - hci_skb_pkt_type(skb) = pkt_type; - hci_recv_frame(vbt->hdev, skb); + min_hdr = sizeof(struct hci_iso_hdr); break; default: kfree_skb(skb); - break; + return; } + + if (skb->len < min_hdr) { + bt_dev_err_ratelimited(vbt->hdev, + "rx pkt_type 0x%02x payload %u < hdr %zu\n", + pkt_type, skb->len, min_hdr); + kfree_skb(skb); + return; + } + + hci_skb_pkt_type(skb) = pkt_type; + hci_recv_frame(vbt->hdev, skb); } static void virtbt_rx_work(struct work_struct *work) From 624fb79dadc1b65757986a9d0fdde5c0cf3fe179 Mon Sep 17 00:00:00 2001 From: Tristan Madani Date: Tue, 21 Apr 2026 11:14:54 +0000 Subject: [PATCH 353/554] Bluetooth: btmtk: validate WMT event SKB length before struct access commit 634a4408c0615c523cf7531790f4f14a422b9206 upstream. btmtk_usb_hci_wmt_sync() casts the WMT event response SKB data to struct btmtk_hci_wmt_evt (7 bytes) and struct btmtk_hci_wmt_evt_funcc (9 bytes) without first checking that the SKB contains enough data. A short firmware response causes out-of-bounds reads from SKB tailroom. Use skb_pull_data() to validate and advance past the base WMT event header. For the FUNC_CTRL case, pull the additional status field bytes before accessing them. Fixes: d019930b0049 ("Bluetooth: btmtk: move btusb_mtk_hci_wmt_sync to btmtk.c") Cc: stable@vger.kernel.org Signed-off-by: Tristan Madani Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btmtk.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index a8c520dc09e19..7b4aa8e7a495b 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -654,8 +654,13 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev, if (data->evt_skb == NULL) goto err_free_wc; - /* Parse and handle the return WMT event */ - wmt_evt = (struct btmtk_hci_wmt_evt *)data->evt_skb->data; + wmt_evt = skb_pull_data(data->evt_skb, sizeof(*wmt_evt)); + if (!wmt_evt) { + bt_dev_err(hdev, "WMT event too short (%u bytes)", + data->evt_skb->len); + err = -EINVAL; + goto err_free_skb; + } if (wmt_evt->whdr.op != hdr->op) { bt_dev_err(hdev, "Wrong op received %d expected %d", wmt_evt->whdr.op, hdr->op); @@ -671,6 +676,12 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev, status = BTMTK_WMT_PATCH_DONE; break; case BTMTK_WMT_FUNC_CTRL: + if (!skb_pull_data(data->evt_skb, + sizeof(wmt_evt_funcc->status))) { + err = -EINVAL; + goto err_free_skb; + } + wmt_evt_funcc = (struct btmtk_hci_wmt_evt_funcc *)wmt_evt; if (be16_to_cpu(wmt_evt_funcc->status) == 0x404) status = BTMTK_WMT_ON_DONE; From 77981a507aa0fc001dc37f0dd6631dd2042fed17 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 10 Apr 2026 15:29:52 -0400 Subject: [PATCH 354/554] Bluetooth: hci_event: Fix OOB read and infinite loop in hci_le_create_big_complete_evt commit 5ddb8014261137cadaf83ab5617a588d80a22586 upstream. hci_le_create_big_complete_evt() iterates over BT_BOUND connections for a BIG handle using a while loop, accessing ev->bis_handle[i++] on each iteration. However, there is no check that i stays within ev->num_bis before the array access. When a controller sends a LE_Create_BIG_Complete event with fewer bis_handle entries than there are BT_BOUND connections for that BIG, or with num_bis=0, the loop reads beyond the valid bis_handle[] flex array into adjacent heap memory. Since the out-of-bounds values typically exceed HCI_CONN_HANDLE_MAX (0x0EFF), hci_conn_set_handle() rejects them and the connection remains in BT_BOUND state. The same connection is then found again by hci_conn_hash_lookup_big_state(), creating an infinite loop with hci_dev_lock held. Fix this by terminating the BIG if in case not all BIS could be setup properly. Fixes: a0bfde167b50 ("Bluetooth: ISO: Add support for connecting multiple BISes") Cc: stable@vger.kernel.org Signed-off-by: ZhiTao Ou Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_event.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index c66443f4cf5a7..6f77ab2629d65 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6973,9 +6973,29 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, continue; } + if (ev->num_bis <= i) { + bt_dev_err(hdev, + "Not enough BIS handles for BIG 0x%2.2x", + ev->handle); + ev->status = HCI_ERROR_UNSPECIFIED; + hci_connect_cfm(conn, ev->status); + hci_conn_del(conn); + continue; + } + if (hci_conn_set_handle(conn, - __le16_to_cpu(ev->bis_handle[i++]))) + __le16_to_cpu(ev->bis_handle[i++]))) { + bt_dev_err(hdev, + "Failed to set BIS handle for BIG 0x%2.2x", + ev->handle); + /* Force error so BIG gets terminated as not all BIS + * could be connected. + */ + ev->status = HCI_ERROR_UNSPECIFIED; + hci_connect_cfm(conn, ev->status); + hci_conn_del(conn); continue; + } conn->state = BT_CONNECTED; set_bit(HCI_CONN_BIG_CREATED, &conn->flags); @@ -6984,7 +7004,10 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, hci_iso_setup_path(conn); } - if (!ev->status && !i) + /* If there is an unexpected error or if no BISes have been connected + * for the BIG, terminate it. + */ + if (ev->status == HCI_ERROR_UNSPECIFIED || (!ev->status && !i)) /* If no BISes have been connected for the BIG, * terminate. This is in case all bound connections * have been closed before the BIG creation From 741e6024e31587b0c021b6616a9e428a4ea0b64a Mon Sep 17 00:00:00 2001 From: Siwei Zhang Date: Wed, 15 Apr 2026 16:49:59 -0400 Subject: [PATCH 355/554] Bluetooth: L2CAP: Fix null-ptr-deref in l2cap_sock_new_connection_cb() commit 0a120d96166301d7a95be75b52f843837dbd1219 upstream. Add the same NULL guard already present in l2cap_sock_resume_cb() and l2cap_sock_ready_cb(). Fixes: 80808e431e1e ("Bluetooth: Add l2cap_chan_ops abstraction") Cc: stable@kernel.org Signed-off-by: Siwei Zhang Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_sock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 80a37d56b040f..9de1e3ca93943 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1480,6 +1480,9 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) { struct sock *sk, *parent = chan->data; + if (!parent) + return NULL; + lock_sock(parent); /* Check for backlog size */ From 1810e42ff6716f320c7269d5850eca48b07b7427 Mon Sep 17 00:00:00 2001 From: Siwei Zhang Date: Wed, 15 Apr 2026 16:51:36 -0400 Subject: [PATCH 356/554] Bluetooth: L2CAP: Fix null-ptr-deref in l2cap_sock_state_change_cb() commit 2ff1a41a912de8517b4482e946dd951b7d80edbf upstream. Add the same NULL guard already present in l2cap_sock_resume_cb() and l2cap_sock_ready_cb(). Fixes: 89bc500e41fc ("Bluetooth: Add state tracking to struct l2cap_chan") Cc: stable@kernel.org Signed-off-by: Siwei Zhang Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_sock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 9de1e3ca93943..d1017d7f5ca78 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1643,6 +1643,9 @@ static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state, { struct sock *sk = chan->data; + if (!sk) + return; + sk->sk_state = state; if (err) From 32bd343803d4ba47cc516f9d5f037f01b855d767 Mon Sep 17 00:00:00 2001 From: Siwei Zhang Date: Wed, 15 Apr 2026 16:53:36 -0400 Subject: [PATCH 357/554] Bluetooth: L2CAP: Fix null-ptr-deref in l2cap_sock_get_sndtimeo_cb() commit 78a88d43dab8d23aeef934ed8ce34d40e6b3d613 upstream. Add the same NULL guard already present in l2cap_sock_resume_cb() and l2cap_sock_ready_cb(). Fixes: 8d836d71e222 ("Bluetooth: Access sk_sndtimeo indirectly in l2cap_core.c") Cc: stable@kernel.org Signed-off-by: Siwei Zhang Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_sock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index d1017d7f5ca78..15637402a39de 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1747,6 +1747,9 @@ static long l2cap_sock_get_sndtimeo_cb(struct l2cap_chan *chan) { struct sock *sk = chan->data; + if (!sk) + return 0; + return READ_ONCE(sk->sk_sndtimeo); } From 4ea3b2e469fc60a76b7bdd1fb25d3703dbf2b161 Mon Sep 17 00:00:00 2001 From: Eliot Courtney Date: Thu, 23 Apr 2026 21:36:52 +0900 Subject: [PATCH 358/554] rust: drm: gem: clean up GEM state in init failure case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2e42a17b8f6bc3c0cd69d7556b588011d3ec2394 upstream. Currently, if `drm_gem_object_init` fails, the object is freed without any cleanup. Perform the cleanup in that case. Cc: stable@vger.kernel.org Fixes: c284d3e42338 ("rust: drm: gem: Add GEM object abstraction") Signed-off-by: Eliot Courtney Reviewed-by: Alice Ryhl Reviewed-by: Onur Özkan Link: https://patch.msgid.link/20260423-fix-gem-1-v1-1-e12e35f7bba9@nvidia.com [ Move safety comment closer to unsafe block to avoid a clippy warning. - Danilo ] Signed-off-by: Danilo Krummrich Signed-off-by: Greg Kroah-Hartman --- rust/kernel/drm/gem/mod.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index 73b5f0bf3f85d..bbe085fd17426 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -232,8 +232,17 @@ impl Object { // SAFETY: `obj.as_raw()` is guaranteed to be valid by the initialization above. unsafe { (*obj.as_raw()).funcs = &Self::OBJECT_FUNCS }; - // SAFETY: The arguments are all valid per the type invariants. - to_result(unsafe { bindings::drm_gem_object_init(dev.as_raw(), obj.obj.get(), size) })?; + if let Err(err) = + // SAFETY: The arguments are all valid per the type invariants. + to_result(unsafe { + bindings::drm_gem_object_init(dev.as_raw(), obj.obj.get(), size) + }) + { + // SAFETY: `drm_gem_object_init()` initializes the private GEM object state before + // failing, so `drm_gem_private_object_fini()` is the matching cleanup. + unsafe { bindings::drm_gem_private_object_fini(obj.obj.get()) }; + return Err(err); + } // SAFETY: We never move out of `Self`. let ptr = KBox::into_raw(unsafe { Pin::into_inner_unchecked(obj) }); From 16a7e845c3a0e5130f1d51d1f92e611d6cfd26f1 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 26 Apr 2026 16:42:00 +0200 Subject: [PATCH 359/554] rust: allow `clippy::collapsible_match` globally commit 838d852da8503372f3a1779bfbd1ccb93153ab4e upstream. The `clippy::collapsible_match` lint [1] can make code harder to read in certain cases [2], e.g. CLIPPY P rust/libmacros.so - due to command line change warning: this `if` can be collapsed into the outer `match` --> rust/pin-init/internal/src/helpers.rs:91:17 | 91 | / if nesting == 1 { 92 | | impl_generics.push(tt.clone()); 93 | | impl_generics.push(tt); 94 | | skip_until_comma = false; 95 | | } | |_________________^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_match = note: `-W clippy::collapsible-match` implied by `-W clippy::all` = help: to override `-W clippy::all` add `#[allow(clippy::collapsible_match)]` help: collapse nested if block | 90 ~ TokenTree::Punct(p) if skip_until_comma && p.as_char() == ',' 91 ~ && nesting == 1 => { 92 | impl_generics.push(tt.clone()); 93 | impl_generics.push(tt); 94 | skip_until_comma = false; 95 ~ } | The lint does not have much upside -- when the suggestion may be a good one, it would still read fine when nested anyway. And it is the kind of lint that may easily bias people to just apply the suggestion instead of allowing it. [ In addition, as Gary points out [3], the suggestion is also wrong [4] and in the process of being fixed [5], possibly for Rust 1.97.0: Link: https://lore.kernel.org/rust-for-linux/DI3YV94TH9I3.1SOHW51552497@garyguo.net/ [3] Link: https://github.com/rust-lang/rust-clippy/issues/16875 [4] Link: https://github.com/rust-lang/rust-clippy/pull/16878 [5] - Miguel ] Thus just let developers decide on their own. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_match [1] Link: https://lore.kernel.org/rust-for-linux/CANiq72nWYJna_hdFxjQCQZK6yJBrr1Mb86iKavivV0U0BgufeA@mail.gmail.com/ [2] Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260426144201.227108-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index ca41f54bea2ee..274ea06fecd50 100644 --- a/Makefile +++ b/Makefile @@ -483,6 +483,7 @@ export rust_common_flags := --edition=2021 \ -Wclippy::as_ptr_cast_mut \ -Wclippy::as_underscore \ -Wclippy::cast_lossless \ + -Aclippy::collapsible_match \ -Wclippy::ignored_unit_patterns \ -Wclippy::mut_mut \ -Wclippy::needless_bitwise_bool \ From ca488ac3b08330d7e980ced0d16c37858b52ac1c Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 26 Apr 2026 16:42:01 +0200 Subject: [PATCH 360/554] rust: allow `clippy::collapsible_if` globally commit 2adc8664018c1cc595c7c0c98474a33c7fe32a85 upstream. Similar to `clippy::collapsible_match` (globally allowed in the previous commit), the `clippy::collapsible_if` lint [1] can make code harder to read in certain cases. Thus just let developers decide on their own. In addition, remove the existing `expect` we had. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Suggested-by: Gary Guo Link: https://lore.kernel.org/rust-for-linux/DGROP5CHU1QZ.1OKJRAUZXE9WC@garyguo.net/ Link: https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_if [1] Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260426144201.227108-2-ojeda@kernel.org Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- Makefile | 1 + drivers/android/binder/range_alloc/array.rs | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 274ea06fecd50..65e97b7e2aa84 100644 --- a/Makefile +++ b/Makefile @@ -483,6 +483,7 @@ export rust_common_flags := --edition=2021 \ -Wclippy::as_ptr_cast_mut \ -Wclippy::as_underscore \ -Wclippy::cast_lossless \ + -Aclippy::collapsible_if \ -Aclippy::collapsible_match \ -Wclippy::ignored_unit_patterns \ -Wclippy::mut_mut \ diff --git a/drivers/android/binder/range_alloc/array.rs b/drivers/android/binder/range_alloc/array.rs index ada1d1b4302e5..081d19b09d4bb 100644 --- a/drivers/android/binder/range_alloc/array.rs +++ b/drivers/android/binder/range_alloc/array.rs @@ -204,7 +204,6 @@ impl ArrayRangeAllocator { // caller will mark them as unused, which means that they can be freed if the system comes // under memory pressure. let mut freed_range = FreedRange::interior_pages(offset, size); - #[expect(clippy::collapsible_if)] // reads better like this if offset % PAGE_SIZE != 0 { if i == 0 || self.ranges[i - 1].endpoint() <= (offset & PAGE_MASK) { freed_range.start_page_idx -= 1; From 40db1bd1ffc6081fb4738f520bfd9dce7b2c2667 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Apr 2026 10:17:50 +0200 Subject: [PATCH 361/554] spi: syncuacer: fix controller deregistration commit 75d849c3452e9611de031db45b3149ba9a99035f upstream. Make sure to deregister the controller before disabling underlying resources like clocks during driver unbind. Fixes: b0823ee35cf9 ("spi: Add spi driver for Socionext SynQuacer platform") Cc: stable@vger.kernel.org # 5.3 Cc: Masahisa Kojima Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260410081757.503099-21-johan@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-synquacer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-synquacer.c b/drivers/spi/spi-synquacer.c index eaf560487591d..e8538816da643 100644 --- a/drivers/spi/spi-synquacer.c +++ b/drivers/spi/spi-synquacer.c @@ -719,7 +719,7 @@ static int synquacer_spi_probe(struct platform_device *pdev) pm_runtime_set_active(sspi->dev); pm_runtime_enable(sspi->dev); - ret = devm_spi_register_controller(sspi->dev, host); + ret = spi_register_controller(host); if (ret) goto disable_pm; @@ -740,9 +740,15 @@ static void synquacer_spi_remove(struct platform_device *pdev) struct spi_controller *host = platform_get_drvdata(pdev); struct synquacer_spi *sspi = spi_controller_get_devdata(host); + spi_controller_get(host); + + spi_unregister_controller(host); + pm_runtime_disable(sspi->dev); clk_disable_unprepare(sspi->clk); + + spi_controller_put(host); } static int __maybe_unused synquacer_spi_suspend(struct device *dev) From 9aea3d4f23ca3995081357511fc68fbc44a6bc7c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Apr 2026 10:17:48 +0200 Subject: [PATCH 362/554] spi: sun4i: fix controller deregistration commit 42108a2f03e0fdeabe9d02d085bdb058baa1189f upstream. Make sure to deregister the controller before disabling underlying resources like clocks during driver unbind. Fixes: b5f6517948cc ("spi: sunxi: Add Allwinner A10 SPI controller driver") Cc: stable@vger.kernel.org # 3.15 Cc: Maxime Ripard Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260410081757.503099-19-johan@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-sun4i.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c index aa92fd5a35a98..280b0fb909e98 100644 --- a/drivers/spi/spi-sun4i.c +++ b/drivers/spi/spi-sun4i.c @@ -505,7 +505,7 @@ static int sun4i_spi_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); pm_runtime_idle(&pdev->dev); - ret = devm_spi_register_controller(&pdev->dev, host); + ret = spi_register_controller(host); if (ret) { dev_err(&pdev->dev, "cannot register SPI host\n"); goto err_pm_disable; @@ -523,7 +523,15 @@ static int sun4i_spi_probe(struct platform_device *pdev) static void sun4i_spi_remove(struct platform_device *pdev) { + struct spi_controller *host = platform_get_drvdata(pdev); + + spi_controller_get(host); + + spi_unregister_controller(host); + pm_runtime_force_suspend(&pdev->dev); + + spi_controller_put(host); } static const struct of_device_id sun4i_spi_match[] = { From 9df7d2530412e33f78ffa101dd6ecc4086db054d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Apr 2026 10:17:53 +0200 Subject: [PATCH 363/554] spi: ti-qspi: fix controller deregistration commit 0c18a1bacbb1d8b8aa34d3d004a2cb8226c8b1ea upstream. Make sure to deregister the controller before disabling underlying resources like clocks during driver unbind. Note that the controller is suspended before disabling and releasing resources since commit 3ac066e2227c ("spi: spi-ti-qspi: Suspend the queue before removing the device") which avoids issues like unclocked accesses but prevents SPI device drivers from doing I/O during deregistration. Fixes: 3b3a80019ff1 ("spi: ti-qspi: one only one interrupt handler") Cc: stable@vger.kernel.org # 3.13 Cc: Sebastian Andrzej Siewior Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260410081757.503099-24-johan@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-ti-qspi.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c index 0b7eaccbc797e..ba54d222d0e0b 100644 --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -889,7 +889,7 @@ static int ti_qspi_probe(struct platform_device *pdev) qspi->mmap_enabled = false; qspi->current_cs = -1; - ret = devm_spi_register_controller(&pdev->dev, host); + ret = spi_register_controller(host); if (!ret) return 0; @@ -904,19 +904,17 @@ static int ti_qspi_probe(struct platform_device *pdev) static void ti_qspi_remove(struct platform_device *pdev) { struct ti_qspi *qspi = platform_get_drvdata(pdev); - int rc; - rc = spi_controller_suspend(qspi->host); - if (rc) { - dev_alert(&pdev->dev, "spi_controller_suspend() failed (%pe)\n", - ERR_PTR(rc)); - return; - } + spi_controller_get(qspi->host); + + spi_unregister_controller(qspi->host); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); ti_qspi_dma_cleanup(qspi); + + spi_controller_put(qspi->host); } static const struct dev_pm_ops ti_qspi_pm_ops = { From 93f83b3a50efa249c655796fb70f64325aa00d08 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Apr 2026 10:17:49 +0200 Subject: [PATCH 364/554] spi: sun6i: fix controller deregistration commit d874a1c33aee0d88fb4ba2f8aeadaa9f1965209a upstream. Make sure to deregister the controller before disabling underlying resources like clocks during driver unbind. Fixes: 3558fe900e8a ("spi: sunxi: Add Allwinner A31 SPI controller driver") Cc: stable@vger.kernel.org # 3.15 Cc: Maxime Ripard Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260410081757.503099-20-johan@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-sun6i.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c index 871dfd3e77be2..1d658068cd8c9 100644 --- a/drivers/spi/spi-sun6i.c +++ b/drivers/spi/spi-sun6i.c @@ -743,7 +743,7 @@ static int sun6i_spi_probe(struct platform_device *pdev) pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); - ret = devm_spi_register_controller(&pdev->dev, host); + ret = spi_register_controller(host); if (ret) { dev_err(&pdev->dev, "cannot register SPI host\n"); goto err_pm_disable; @@ -769,12 +769,18 @@ static void sun6i_spi_remove(struct platform_device *pdev) { struct spi_controller *host = platform_get_drvdata(pdev); + spi_controller_get(host); + + spi_unregister_controller(host); + pm_runtime_force_suspend(&pdev->dev); if (host->dma_tx) dma_release_channel(host->dma_tx); if (host->dma_rx) dma_release_channel(host->dma_rx); + + spi_controller_put(host); } static const struct sun6i_spi_cfg sun6i_a31_spi_cfg = { From 4f0dd529990d6668aae19d39bd6fae40cab962ef Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Apr 2026 10:17:55 +0200 Subject: [PATCH 365/554] spi: zynqmp-gqspi: fix controller deregistration commit 6895fc4faafc9082e15e4e624b23dd5f0c98feb5 upstream. Make sure to deregister the controller before disabling underlying resources like clocks during driver unbind. Fixes: dfe11a11d523 ("spi: Add support for Zynq Ultrascale+ MPSoC GQSPI controller") Cc: stable@vger.kernel.org # 4.2: 64640f6c972e Cc: stable@vger.kernel.org # 4.2 Cc: Ranjit Waghmode Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260410081757.503099-26-johan@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-zynqmp-gqspi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index 502fd5eccc834..f9a1427dabade 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1324,7 +1324,7 @@ static int zynqmp_qspi_probe(struct platform_device *pdev) ctlr->dev.of_node = np; ctlr->auto_runtime_pm = true; - ret = devm_spi_register_controller(&pdev->dev, ctlr); + ret = spi_register_controller(ctlr); if (ret) { dev_err(&pdev->dev, "spi_register_controller failed\n"); goto clk_dis_all; @@ -1362,6 +1362,8 @@ static void zynqmp_qspi_remove(struct platform_device *pdev) pm_runtime_get_sync(&pdev->dev); + spi_unregister_controller(xqspi->ctlr); + zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0); pm_runtime_disable(&pdev->dev); From 1b66f16a571a10ba8889ac471755c8af9c5b9266 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Apr 2026 11:49:25 +0200 Subject: [PATCH 366/554] spi: s3c64xx: fix NULL-deref on driver unbind commit 45daacbead8a009844bd5dba6cfa731332184d17 upstream. A change moving DMA channel allocation from probe() back to s3c64xx_spi_prepare_transfer() failed to remove the corresponding deallocation from remove(). Drop the bogus DMA channel release from remove() to avoid triggering a NULL-pointer dereference on driver unbind. This issue was flagged by Sashiko when reviewing a controller deregistration fix. Fixes: f52b03c70744 ("spi: s3c64xx: requests spi-dma channel only during data transfer") Cc: stable@vger.kernel.org # 6.0 Cc: Adithya K V Link: https://sashiko.dev/#/patchset/20260410081757.503099-1-johan%40kernel.org Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260410094925.518343-1-johan@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-s3c64xx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index aab36c779c06a..33c80daec5f6b 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -1402,11 +1402,6 @@ static void s3c64xx_spi_remove(struct platform_device *pdev) writel(0, sdd->regs + S3C64XX_SPI_INT_EN); - if (!is_polling(sdd)) { - dma_release_channel(sdd->rx_dma.ch); - dma_release_channel(sdd->tx_dma.ch); - } - pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); From d71240d2609bebe9e2074c5912a60fe76415d238 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 24 Apr 2026 12:49:10 +0200 Subject: [PATCH 367/554] staging: vme_user: fix root device leak on init failure commit 32c91e8ee039777d0b95b914633fc6a42607959c upstream. Make sure to deregister and free the root device in case module initialisation fails. Fixes: 658bcdae9c67 ("vme: Adding Fake VME driver") Cc: stable@vger.kernel.org # 4.9 Cc: Martyn Welch Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260424104910.2619349-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vme_user/vme_fake.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/vme_user/vme_fake.c b/drivers/staging/vme_user/vme_fake.c index 731fbba17dfd6..c6a7c0c69f16e 100644 --- a/drivers/staging/vme_user/vme_fake.c +++ b/drivers/staging/vme_user/vme_fake.c @@ -1230,6 +1230,8 @@ static int __init fake_init(void) err_driver: kfree(fake_bridge); err_struct: + root_device_unregister(vme_root); + return retval; } From 7baa02b0ae9d17ec5f08836d8ea88ce1927d0678 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 10 Apr 2026 16:49:47 +0200 Subject: [PATCH 368/554] fanotify: fix false positive on permission events commit 7746e3bd4cc19b5092e00d32d676e329bfcb6900 upstream. fsnotify_get_mark_safe() may return false for a mark on an unrelated group, which results in bypassing the permission check. Fix by skipping over detached marks that are not in the current group. CC: stable@vger.kernel.org Fixes: abc77577a669 ("fsnotify: Provide framework for dropping SRCU lock in ->handle_event") Signed-off-by: Miklos Szeredi Link: https://patch.msgid.link/20260410144950.156160-1-mszeredi@redhat.com Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/notify/fsnotify.c | 2 +- fs/notify/mark.c | 18 +++++++++++------- include/linux/fsnotify_backend.h | 1 + 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 63dd44931989d..cae0adce6d47a 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -444,7 +444,7 @@ static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector return hlist_entry_safe(node, struct fsnotify_mark, obj_list); } -static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) +struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) { struct hlist_node *node = NULL; diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 55a03bb05aa11..cedd84afbede5 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -453,9 +453,6 @@ EXPORT_SYMBOL_GPL(fsnotify_put_mark); */ static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) { - if (!mark) - return true; - if (refcount_inc_not_zero(&mark->refcnt)) { spin_lock(&mark->lock); if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) { @@ -496,15 +493,22 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) int type; fsnotify_foreach_iter_type(type) { + struct fsnotify_mark *mark = iter_info->marks[type]; + /* This can fail if mark is being removed */ - if (!fsnotify_get_mark_safe(iter_info->marks[type])) { - __release(&fsnotify_mark_srcu); - goto fail; + while (mark && !fsnotify_get_mark_safe(mark)) { + if (mark->group == iter_info->current_group) { + __release(&fsnotify_mark_srcu); + goto fail; + } + /* This is a mark in an unrelated group, skip */ + mark = fsnotify_next_mark(mark); + iter_info->marks[type] = mark; } } /* - * Now that both marks are pinned by refcount in the inode / vfsmount + * Now that all marks are pinned by refcount in the inode / vfsmount / etc * lists, we can drop SRCU lock, and safely resume the list iteration * once userspace returns. */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 0d954ea7b1796..4affacb909ae2 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -912,6 +912,7 @@ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int obj_type); extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); +struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); From 83b131a214f1fd112bc3d42f3b7ca1419b963759 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:06 +0100 Subject: [PATCH 369/554] KVM: arm64: Fix kvm_vcpu_initialized() macro parameter commit d89fdda7dd8a488f922e1175e6782f781ba8a23b upstream. The macro is defined with parameter 'v' but the body references the literal token 'vcpu' instead, causing it to silently operate on whatever 'vcpu' resolves to in the caller's scope rather than the value passed by the caller. All current call sites happen to use a variable named 'vcpu', so the bug is latent. Fixes: e016333745c7 ("KVM: arm64: Only reset vCPU-scoped feature ID regs once") Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-5-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 64302c438355c..7bee8a357ceb0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1476,7 +1476,7 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature) #define kvm_vcpu_has_feature(k, f) __vcpu_has_feature(&(k)->arch, (f)) #define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f)) -#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED) +#define kvm_vcpu_initialized(v) vcpu_get_flag(v, VCPU_INITIALIZED) int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM From 34bdcfb496b29f9a52431194f94473b37fb8c162 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 17 Apr 2026 15:24:39 +0000 Subject: [PATCH 370/554] mtd: spi-nor: debugfs: fix out-of-bounds read in spi_nor_params_show() commit e47029b977e747cb3a9174308fd55762cce70147 upstream. Sashiko noticed an out-of-bounds read [1]. In spi_nor_params_show(), the snor_f_names array is passed to spi_nor_print_flags() using sizeof(snor_f_names). Since snor_f_names is an array of pointers, sizeof() returns the total number of bytes occupied by the pointers (element_count * sizeof(void *)) rather than the element count itself. On 64-bit systems, this makes the passed length 8x larger than intended. Inside spi_nor_print_flags(), the 'names_len' argument is used to bounds-check the 'names' array access. An out-of-bounds read occurs if a flag bit is set that exceeds the array's actual element count but is within the inflated byte-size count. Correct this by using ARRAY_SIZE() to pass the actual number of string pointers in the array. Cc: stable@vger.kernel.org Fixes: 0257be79fc4a ("mtd: spi-nor: expose internal parameters via debugfs") Closes: https://sashiko.dev/#/patchset/20260417-die-erase-fix-v2-1-73bb7004ebad%40infineon.com [1] Signed-off-by: Tudor Ambarus Reviewed-by: Takahiro Kuwano Reviewed-by: Michael Walle Reviewed-by: Pratyush Yadav Signed-off-by: Miquel Raynal Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/debugfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/debugfs.c b/drivers/mtd/spi-nor/debugfs.c index fa6956144d2e4..14ba1680c3154 100644 --- a/drivers/mtd/spi-nor/debugfs.c +++ b/drivers/mtd/spi-nor/debugfs.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -92,7 +93,8 @@ static int spi_nor_params_show(struct seq_file *s, void *data) seq_printf(s, "address nbytes\t%u\n", nor->addr_nbytes); seq_puts(s, "flags\t\t"); - spi_nor_print_flags(s, nor->flags, snor_f_names, sizeof(snor_f_names)); + spi_nor_print_flags(s, nor->flags, snor_f_names, + ARRAY_SIZE(snor_f_names)); seq_puts(s, "\n"); seq_puts(s, "\nopcodes\n"); From 10c252577ccdb0f05bf4cb1e3972e9613db69895 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Mon, 27 Apr 2026 13:03:33 +0100 Subject: [PATCH 371/554] arm64: signal: Preserve POR_EL0 if poe_context is missing commit 030e8a40fff65ca6ac1c04a4d3c08afe72438922 upstream. Commit 2e8a1acea859 ("arm64: signal: Improve POR_EL0 handling to avoid uaccess failures") delayed the write to POR_EL0 in rt_sigreturn to avoid spurious uaccess failures. This change however relies on the poe_context frame record being present: on a system supporting POE, calling sigreturn without a poe_context record now results in writing arbitrary data from the kernel stack into POR_EL0. Fix this by adding a __valid_fields member to struct user_access_state, and zeroing the struct on allocation. restore_poe_context() then indicates that the por_el0 field is valid by setting the corresponding bit in __valid_fields, and restore_user_access_state() only touches POR_EL0 if there is a valid value to set it to. This is in line with how POR_EL0 was originally handled; all frame records are currently optional, except fpsimd_context. To ensure that __valid_fields is kept in sync, fields (currently just por_el0) are now accessed via accessors and prefixed with __ to discourage direct access. Fixes: 2e8a1acea859 ("arm64: signal: Improve POR_EL0 handling to avoid uaccess failures") Cc: Reported-by: Will Deacon Signed-off-by: Kevin Brodsky Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/signal.c | 54 ++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 08ffc5a5aea4c..38e6fa204c17b 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -67,6 +67,9 @@ struct rt_sigframe_user_layout { unsigned long end_offset; }; +#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) +#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) + /* * Holds any EL0-controlled state that influences unprivileged memory accesses. * This includes both accesses done in userspace and uaccess done in the kernel. @@ -74,13 +77,35 @@ struct rt_sigframe_user_layout { * This state needs to be carefully managed to ensure that it doesn't cause * uaccess to fail when setting up the signal frame, and the signal handler * itself also expects a well-defined state when entered. + * + * The struct should be zero-initialised. Its members should only be accessed + * via the accessors below. __valid_fields tracks which of the fields are valid + * (have been set to some value). */ struct user_access_state { - u64 por_el0; + unsigned int __valid_fields; + u64 __por_el0; }; -#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) -#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) +#define UA_STATE_HAS_POR_EL0 BIT(0) + +static void set_ua_state_por_el0(struct user_access_state *ua_state, + u64 por_el0) +{ + ua_state->__por_el0 = por_el0; + ua_state->__valid_fields |= UA_STATE_HAS_POR_EL0; +} + +static int get_ua_state_por_el0(const struct user_access_state *ua_state, + u64 *por_el0) +{ + if (ua_state->__valid_fields & UA_STATE_HAS_POR_EL0) { + *por_el0 = ua_state->__por_el0; + return 0; + } + + return -ENOENT; +} /* * Save the user access state into ua_state and reset it to disable any @@ -94,7 +119,7 @@ static void save_reset_user_access_state(struct user_access_state *ua_state) for (int pkey = 0; pkey < arch_max_pkey(); pkey++) por_enable_all |= POR_ELx_PERM_PREP(pkey, POE_RWX); - ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0); + set_ua_state_por_el0(ua_state, read_sysreg_s(SYS_POR_EL0)); write_sysreg_s(por_enable_all, SYS_POR_EL0); /* * No ISB required as we can tolerate spurious Overlay faults - @@ -122,8 +147,10 @@ static void set_handler_user_access_state(void) */ static void restore_user_access_state(const struct user_access_state *ua_state) { - if (system_supports_poe()) - write_sysreg_s(ua_state->por_el0, SYS_POR_EL0); + u64 por_el0; + + if (get_ua_state_por_el0(ua_state, &por_el0) == 0) + write_sysreg_s(por_el0, SYS_POR_EL0); } static void init_user_layout(struct rt_sigframe_user_layout *user) @@ -333,11 +360,16 @@ static int restore_fpmr_context(struct user_ctxs *user) static int preserve_poe_context(struct poe_context __user *ctx, const struct user_access_state *ua_state) { - int err = 0; + int err; + u64 por_el0; + + err = get_ua_state_por_el0(ua_state, &por_el0); + if (WARN_ON_ONCE(err)) + return err; __put_user_error(POE_MAGIC, &ctx->head.magic, err); __put_user_error(sizeof(*ctx), &ctx->head.size, err); - __put_user_error(ua_state->por_el0, &ctx->por_el0, err); + __put_user_error(por_el0, &ctx->por_el0, err); return err; } @@ -353,7 +385,7 @@ static int restore_poe_context(struct user_ctxs *user, __get_user_error(por_el0, &(user->poe->por_el0), err); if (!err) - ua_state->por_el0 = por_el0; + set_ua_state_por_el0(ua_state, por_el0); return err; } @@ -1095,7 +1127,7 @@ SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; - struct user_access_state ua_state; + struct user_access_state ua_state = {}; /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -1507,7 +1539,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, { struct rt_sigframe_user_layout user; struct rt_sigframe __user *frame; - struct user_access_state ua_state; + struct user_access_state ua_state = {}; int err = 0; fpsimd_save_and_flush_current_state(); From 8f75597dfc67bd8c6b9c0ea9fc42abb28e3f6b04 Mon Sep 17 00:00:00 2001 From: Sang-Heon Jeon Date: Wed, 22 Apr 2026 23:33:53 +0900 Subject: [PATCH 372/554] mm/hugetlb_cma: round up per_node before logging it commit 8f5ce56b76303c55b78a87af996e2e0f8535f979 upstream. When the user requests a total hugetlb CMA size without per-node specification, hugetlb_cma_reserve() computes per_node from hugetlb_cma_size and the number of nodes that have memory per_node = DIV_ROUND_UP(hugetlb_cma_size, nodes_weight(hugetlb_bootmem_nodes)); The reservation loop later computes size = round_up(min(per_node, hugetlb_cma_size - reserved), PAGE_SIZE << order); So the actually reserved per_node size is multiple of (PAGE_SIZE << order), but the logged per_node is not rounded up, so it may be smaller than the actual reserved size. For example, as the existing comment describes, if a 3 GB area is requested on a machine with 4 NUMA nodes that have memory, 1 GB is allocated on the first three nodes, but the printed log is hugetlb_cma: reserve 3072 MiB, up to 768 MiB per node Round per_node up to (PAGE_SIZE << order) before logging so that the printed log always matches the actual reserved size. No functional change to the actual reservation size, as the following case analysis shows 1. remaining (hugetlb_cma_size - reserved) >= rounded per_node - AS-IS: min() picks unrounded per_node; round_up() returns rounded per_node - TO-BE: min() picks rounded per_node; round_up() returns rounded per_node (no-op) 2. remaining < unrounded per_node - AS-IS: min() picks remaining; round_up() returns round_up(remaining) - TO-BE: min() picks remaining; round_up() returns round_up(remaining) 3. unrounded per_node <= remaining < rounded per_node - AS-IS: min() picks unrounded per_node; round_up() returns rounded per_node - TO-BE: min() picks remaining; round_up() returns round_up(remaining) equals rounded per_node Link: https://lore.kernel.org/20260422143353.852257-1-ekffu200098@gmail.com Fixes: cf11e85fc08c ("mm: hugetlb: optionally allocate gigantic hugepages using cma") # 5.7 Signed-off-by: Sang-Heon Jeon Reviewed-by: Muchun Song Cc: David Hildenbrand Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb_cma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c index e8e4dc7182d54..603927d881522 100644 --- a/mm/hugetlb_cma.c +++ b/mm/hugetlb_cma.c @@ -193,6 +193,7 @@ void __init hugetlb_cma_reserve(int order) */ per_node = DIV_ROUND_UP(hugetlb_cma_size, nodes_weight(hugetlb_bootmem_nodes)); + per_node = round_up(per_node, PAGE_SIZE << order); pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n", hugetlb_cma_size / SZ_1M, per_node / SZ_1M); } From 52070a91956fe758e33a029ea8307bb71023a214 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 4 May 2026 09:00:01 +0800 Subject: [PATCH 373/554] LoongArch: Fix SYM_SIGFUNC_START definition for 32BIT commit 98b8aebb14fdc0133939fd8fe07d0d98333dc976 upstream. The SYM_SIGFUNC_START definition should match sigcontext that the length of GPRs are 8 bytes for both 32BIT and 64BIT. So replace SZREG with 8 to fix it. Cc: stable@vger.kernel.org Fixes: e4878c37f6679fde ("LoongArch: vDSO: Emit GNU_EH_FRAME correctly") Suggested-by: Xi Ruoyao Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/include/asm/linkage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/linkage.h b/arch/loongarch/include/asm/linkage.h index a1bd6a3ee03a1..ae937d1708b24 100644 --- a/arch/loongarch/include/asm/linkage.h +++ b/arch/loongarch/include/asm/linkage.h @@ -69,7 +69,7 @@ 9, 10, 11, 12, 13, 14, 15, 16, \ 17, 18, 19, 20, 21, 22, 23, 24, \ 25, 26, 27, 28, 29, 30, 31; \ - .cfi_offset \num, SC_REGS + \num * SZREG; \ + .cfi_offset \num, SC_REGS + \num * 8; \ .endr; \ \ nop; \ From fbe0e6197225e6a83cf113a67a4b425f8de0bcd5 Mon Sep 17 00:00:00 2001 From: Kai Zen Date: Thu, 30 Apr 2026 18:26:48 +0300 Subject: [PATCH 374/554] net: rtnetlink: zero ifla_vf_broadcast to avoid stack infoleak in rtnl_fill_vfinfo commit 4b9e327991815e128ad3af75c3a04630a63ce3e0 upstream. rtnl_fill_vfinfo() declares struct ifla_vf_broadcast on the stack without initialisation: struct ifla_vf_broadcast vf_broadcast; The struct contains a single fixed 32-byte field: /* include/uapi/linux/if_link.h */ struct ifla_vf_broadcast { __u8 broadcast[32]; }; The function then copies dev->broadcast into it using dev->addr_len as the length: memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len); On Ethernet devices (the overwhelming majority of SR-IOV NICs) dev->addr_len is 6, so only the first 6 bytes of broadcast[] are written. The remaining 26 bytes retain whatever was previously on the kernel stack. The full struct is then handed to userspace via: nla_put(skb, IFLA_VF_BROADCAST, sizeof(vf_broadcast), &vf_broadcast) leaking up to 26 bytes of uninitialised kernel stack per VF per RTM_GETLINK request, repeatable. The other vf_* structs in the same function are explicitly zeroed for exactly this reason - see the memset() calls for ivi, vf_vlan_info, node_guid and port_guid a few lines above. vf_broadcast was simply missed when it was added. Reachability: any unprivileged local process can open AF_NETLINK / NETLINK_ROUTE without capabilities and send RTM_GETLINK with an IFLA_EXT_MASK attribute carrying RTEXT_FILTER_VF. The kernel walks each VF and emits IFLA_VF_BROADCAST, leaking 26 bytes of stack per VF per request. Stack residue at this call site can include return addresses and transient sensitive data; KASAN with stack instrumentation, or KMSAN, will flag the nla_put() when reproduced. Zero the on-stack struct before the partial memcpy, matching the existing pattern used for the other vf_* structs in the same function. Fixes: 75345f888f70 ("ipoib: show VF broadcast address") Cc: stable@vger.kernel.org Signed-off-by: Kai Zen Link: https://patch.msgid.link/3c506e8f936e52b57620269b55c348af05d413a2.1777557228.git.kai.aizen.dev@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f4ed60bd9a256..0a43c3881e3f8 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1566,6 +1566,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, port_guid.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); + memset(&vf_broadcast, 0, sizeof(vf_broadcast)); memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; From 4bf22a0afb27c7fdce2230d0cf30eee57cc1c883 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:55 +0200 Subject: [PATCH 375/554] mptcp: pm: ADD_ADDR rtx: skip inactive subflows commit c6d395e2de1306b5fef0344a3c3835fbbfaa18be upstream. When looking at the maximum RTO amongst the subflows, inactive subflows were taken into account: that includes stale ones, and the initial one if it has been already been closed. Unusable subflows are now simply skipped. Stale ones are used as an alternative: if there are only stale ones, to take their maximum RTO and avoid to eventually fallback to net.mptcp.add_addr_timeout, which is set to 2 minutes by default. Fixes: 30549eebc4d8 ("mptcp: make ADD_ADDR retransmission timeout adaptive") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-7-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index e22d53af0ffa0..ec1c570bc4525 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -305,18 +305,28 @@ static unsigned int mptcp_adjust_add_addr_timeout(struct mptcp_sock *msk) const struct net *net = sock_net((struct sock *)msk); unsigned int rto = mptcp_get_add_addr_timeout(net); struct mptcp_subflow_context *subflow; - unsigned int max = 0; + unsigned int max = 0, max_stale = 0; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct inet_connection_sock *icsk = inet_csk(ssk); - if (icsk->icsk_rto > max) + if (!__mptcp_subflow_active(subflow)) + continue; + + if (unlikely(subflow->stale)) { + if (icsk->icsk_rto > max_stale) + max_stale = icsk->icsk_rto; + } else if (icsk->icsk_rto > max) { max = icsk->icsk_rto; + } } - if (max && max < rto) - rto = max; + if (max) + return min(max, rto); + + if (max_stale) + return min(max_stale, rto); return rto; } From aab56b95bee3ff79176b13443cd9d7cfe9747df0 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Thu, 30 Apr 2026 08:25:54 +0800 Subject: [PATCH 376/554] perf/x86/intel: Improve validation and configuration of ACR masks commit 5ad732a56be46aabf158c16aa0c095291727aaef upstream. Currently there are several issues on the user space ACR mask validation and configuration. - The validation for user space ACR mask (attr.config2) is incomplete, e.g., the ACR mask could include the index which belongs to another ACR events group, but it's not validated. - An early return on an invalid ACR mask caused all subsequent ACR groups to be skipped. - The stale hardware ACR mask (hw.config1) is not cleared before setting new hardware ACR mask. The following changes address all of the above issues. - Figure out the event index group of an ACR group. Any bits in the user-space mask not present in the index group are now dropped. - Instead of an early return on invalid bits, drop only the invalid portions and continue iterating through all ACR events to ensure full configuration. - Explicitly clear the stale hardware ACR mask for each event prior to writing the new configuration. Besides, a non-leader event member of ACR group could be disabled in theory. This could cause bit-shifting errors in the acr_mask of remaining group members. But since ACR sampling requires all events to be active, this should not be a big concern in real use case. Add a "FIXME" comment to notice this risk. Fixes: ec980e4facef ("perf/x86/intel: Support auto counter reload") Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260430002558.712334-2-dapeng1.mi@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 3046058e7e238..7d8c7eb7838da 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2980,23 +2980,41 @@ static void intel_pmu_enable_event(struct perf_event *event) static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc) { struct perf_event *event, *leader; - int i, j, idx; + int i, j, k, bit, idx; + /* + * FIXME: ACR mask parsing relies on cpuc->event_list[] (active events only). + * Disabling an ACR event causes bit-shifting errors in the acr_mask of + * remaining group members. As ACR sampling requires all events to be active, + * this limitation is acceptable for now. Revisit if independent event toggling + * is required. + */ for (i = 0; i < cpuc->n_events; i++) { leader = cpuc->event_list[i]; if (!is_acr_event_group(leader)) continue; - /* The ACR events must be contiguous. */ + /* Find the last event of the ACR group. */ for (j = i; j < cpuc->n_events; j++) { event = cpuc->event_list[j]; if (event->group_leader != leader->group_leader) break; - for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { - if (i + idx >= cpuc->n_events || - !is_acr_event_group(cpuc->event_list[i + idx])) - return; - __set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1); + } + + /* + * Translate the user-space ACR mask (attr.config2) into the physical + * counter bitmask (hw.config1) for each ACR event in the group. + * NOTE: ACR event contiguity is guaranteed by intel_pmu_hw_config(). + */ + for (k = i; k < j; k++) { + event = cpuc->event_list[k]; + event->hw.config1 = 0; + for_each_set_bit(bit, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { + idx = i + bit; + /* Event index of ACR group must locate in [i, j). */ + if (idx >= j || !is_acr_event_group(cpuc->event_list[idx])) + continue; + __set_bit(cpuc->assign[idx], (unsigned long *)&event->hw.config1); } } i = j - 1; From 0ff2b713f406e9ecadb406014d74e7a020ac12b1 Mon Sep 17 00:00:00 2001 From: SeungJu Cheon Date: Sun, 26 Apr 2026 20:12:39 +0900 Subject: [PATCH 377/554] sound: ua101: fix division by zero at probe commit d1f73f169c1014463b5060e3f60813e13ddc7b87 upstream. Add a missing sanity check for bNrChannels in detect_usb_format() to prevent a division by zero in playback_urb_complete() and capture_urb_complete(). USB core does not validate class-specific descriptor fields such as bNrChannels, so drivers must verify them before use. If a device provides bNrChannels = 0, frame_bytes becomes zero and is later used as a divisor in the URB completion handlers, leading to a kernel crash. Fixes: 63978ab3e3e9 ("sound: add Edirol UA-101 support") Cc: stable@vger.kernel.org Signed-off-by: SeungJu Cheon Link: https://patch.msgid.link/20260426111239.103296-1-suunj1331@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/misc/ua101.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/usb/misc/ua101.c b/sound/usb/misc/ua101.c index 76b6eb55dcc22..99e98b5e3241f 100644 --- a/sound/usb/misc/ua101.c +++ b/sound/usb/misc/ua101.c @@ -974,6 +974,13 @@ static int detect_usb_format(struct ua101 *ua) ua->capture.channels = fmt_capture->bNrChannels; ua->playback.channels = fmt_playback->bNrChannels; + if (!ua->capture.channels || !ua->playback.channels) { + dev_err(&ua->dev->dev, + "invalid channel count: capture %u, playback %u\n", + ua->capture.channels, ua->playback.channels); + return -EINVAL; + } + ua->capture.frame_bytes = fmt_capture->bSubframeSize * ua->capture.channels; ua->playback.frame_bytes = From 0479b6e9f999cc1cbad7d9f09f574fc387e605d5 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 1 May 2026 09:41:41 +0530 Subject: [PATCH 378/554] pseries/papr-hvpipe: Prevent kernel stack memory leak to userspace commit cefeed44296261173a806bef988b26bc565da4be upstream. The hdr variable is allocated on the stack and only hdr.version and hdr.flags are initialized explicitly. Because the struct papr_hvpipe_hdr contains reserved padding bytes (reserved[3] and reserved2[40]), these could leak the uninitialized bytes to userspace after copy_to_user(). This patch fixes that by initializing the whole struct to 0. Cc: stable@vger.kernel.org Fixes: cebdb522fd3ed ("powerpc/pseries: Receive payload with ibm,receive-hvpipe-msg RTAS") Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/7bfe03b65a282c856ed8182d1871bb973c0b78f2.1777606826.git.ritesh.list@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/papr-hvpipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index 21a2f447c43fd..a690bb15d4a90 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -327,7 +327,7 @@ static ssize_t papr_hvpipe_handle_read(struct file *file, { struct hvpipe_source_info *src_info = file->private_data; - struct papr_hvpipe_hdr hdr; + struct papr_hvpipe_hdr hdr = {}; long ret; /* From 2b1335edaac5fec564b37883d5703912d97d8c5d Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 1 May 2026 09:41:43 +0530 Subject: [PATCH 379/554] pseries/papr-hvpipe: Fix & simplify error handling in papr_hvpipe_init() commit 713e468cdbc2277db6ce949c32c1acbd83501733 upstream. Remove such 3 levels of nesting patterns to check success return values from function calls. ret = enable_hvpipe_IRQ() if (!ret) ret = set_hvpipe_sys_param(1) if (!ret) ret = misc_register() Instead just bail out to "out*:" labels, in case of any error. This simplifies the init flow. While at it let's also fix the following error handling logic: We have already enabled interrupt sources and enabled hvpipe to received interrupts, if misc_register() fails, we will destroy the workqueue, but the HMC might send us a msg via hvpipe which will call, queue work on the workqueue which might be destroyed. So instead, let's reverse the order of enabling set_hvpipe_sys_param(1) and in case of an error let's remove the misc dev by calling misc_deregister(). Cc: stable@vger.kernel.org Fixes: 39a08a4f94980 ("powerpc/pseries: Enable hvpipe with ibm,set-system-parameter RTAS") Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/f2141eafb80e7780395e03aa9a22e8a37be80513.1777606826.git.ritesh.list@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/papr-hvpipe.c | 28 ++++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index a690bb15d4a90..4f938b187ad2e 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -796,23 +796,29 @@ static int __init papr_hvpipe_init(void) } ret = enable_hvpipe_IRQ(); - if (!ret) { - ret = set_hvpipe_sys_param(1); - if (!ret) - ret = misc_register(&papr_hvpipe_dev); - } + if (ret) + goto out_wq; - if (!ret) { - pr_info("hvpipe feature is enabled\n"); - hvpipe_feature = true; - return 0; - } + ret = misc_register(&papr_hvpipe_dev); + if (ret) + goto out_wq; - pr_err("hvpipe feature is not enabled %d\n", ret); + ret = set_hvpipe_sys_param(1); + if (ret) + goto out_misc; + + pr_info("hvpipe feature is enabled\n"); + hvpipe_feature = true; + return 0; + +out_misc: + misc_deregister(&papr_hvpipe_dev); +out_wq: destroy_workqueue(papr_hvpipe_wq); out: kfree(papr_hvpipe_work); papr_hvpipe_work = NULL; + pr_err("hvpipe feature is not enabled %d\n", ret); return ret; } machine_device_initcall(pseries, papr_hvpipe_init); From 62951b49754aceda027e0a017dc9bb52ef5094fd Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 1 May 2026 09:41:44 +0530 Subject: [PATCH 380/554] pseries/papr-hvpipe: Fix the usage of copy_to_user() commit d48654bd8b1a75f662e224d257db54de475120dc upstream. copy_to_user() return bytes_not_copied to the user buffer. If there was an error writing bytes into the user buffer, i.e. if copy_to_user returns a non-zero value, then we should simply return -EFAULT from the ->read() call. Otherwise, in the non-patched version, we may end up mixing "bytes_not_copied + bytes_copied (HVPIPE_HDR_LEN)" as the return value to the user in ->read() call Also let's make sure we clear the hvpipe_status flag, if we have consumed the hvpipe msg by making the rtas call. ret = -EFAULT means copy_to_user has failed but that still means that the msg was read from the hvpipe, hence for both cases, success & -EFAULT, we should clear the HVPIPE_MSG_AVAILABLE flag in hvpipe_status. Cc: stable@vger.kernel.org Fixes: cebdb522fd3edd1 ("powerpc/pseries: Receive payload with ibm,receive-hvpipe-msg RTAS") Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/8fda3212a1ad48879c174e92f67472d9b9f1c3b7.1777606826.git.ritesh.list@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/papr-hvpipe.c | 23 ++++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index 4f938b187ad2e..431f6a3da4c5a 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -206,10 +206,11 @@ static int hvpipe_rtas_recv_msg(char __user *buf, int size) bytes_written, size); bytes_written = size; } - ret = copy_to_user(buf, + if (copy_to_user(buf, rtas_work_area_raw_buf(work_area), - bytes_written); - if (!ret) + bytes_written)) + ret = -EFAULT; + else ret = bytes_written; } } else { @@ -328,7 +329,7 @@ static ssize_t papr_hvpipe_handle_read(struct file *file, struct hvpipe_source_info *src_info = file->private_data; struct papr_hvpipe_hdr hdr = {}; - long ret; + ssize_t ret = 0; /* * Return -ENXIO during migration @@ -376,7 +377,7 @@ static ssize_t papr_hvpipe_handle_read(struct file *file, ret = copy_to_user(buf, &hdr, HVPIPE_HDR_LEN); if (ret) - return ret; + return -EFAULT; /* * Message event has payload, so get the payload with @@ -385,19 +386,23 @@ static ssize_t papr_hvpipe_handle_read(struct file *file, if (hdr.flags & HVPIPE_MSG_AVAILABLE) { ret = hvpipe_rtas_recv_msg(buf + HVPIPE_HDR_LEN, size - HVPIPE_HDR_LEN); - if (ret > 0) { + /* + * Always clear MSG_AVAILABLE once the RTAS call has drained + * the message, regardless of whether copy_to_user succeeded. + */ + if (ret >= 0 || ret == -EFAULT) src_info->hvpipe_status &= ~HVPIPE_MSG_AVAILABLE; - ret += HVPIPE_HDR_LEN; - } } else if (hdr.flags & HVPIPE_LOST_CONNECTION) { /* * Hypervisor is closing the pipe for the specific * source. So notify user space. */ src_info->hvpipe_status &= ~HVPIPE_LOST_CONNECTION; - ret = HVPIPE_HDR_LEN; } + if (ret >= 0) + ret += HVPIPE_HDR_LEN; + return ret; } From 33c5bb50b9c40e8451e6aec4487a31d794b98d92 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Wed, 29 Apr 2026 16:37:42 +0800 Subject: [PATCH 381/554] net: libwx: fix VF illegal register access commit 694de316f607fe2473d52ca0707e3918e72c1562 upstream. Register WX_CFG_PORT_ST is a PF restricted register. When a VF is initialized, attempting to read this register triggers an illegal register access, which lead to a system hang. When the device is VF, the bus function ID can be obtained directly from the PCI_FUNC(pdev->devfn). Fixes: a04ea57aae37 ("net: libwx: fix device bus LAN ID") Cc: stable@vger.kernel.org Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/4D1F4452D21DE107+20260429083743.88961-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index b37d6cfbfbe94..be59a10228904 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2427,8 +2427,11 @@ int wx_sw_init(struct wx *wx) wx->oem_svid = pdev->subsystem_vendor; wx->oem_ssid = pdev->subsystem_device; wx->bus.device = PCI_SLOT(pdev->devfn); - wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID, - rd32(wx, WX_CFG_PORT_ST)); + if (pdev->is_virtfn) + wx->bus.func = PCI_FUNC(pdev->devfn); + else + wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID, + rd32(wx, WX_CFG_PORT_ST)); if (wx->oem_svid == PCI_VENDOR_ID_WANGXUN || pdev->is_virtfn) { From e70cfb40c3a99b232cd42c6a6a10f0d8e039dc82 Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Thu, 30 Apr 2026 18:33:18 +0800 Subject: [PATCH 382/554] ip6_gre: Use cached t->net in ip6erspan_changelink(). commit 1d324c2f43f70c965f25c58cc3611c779adbe47e upstream. After commit 5e72ce3e3980 ("net: ipv6: Use link netns in newlink() of rtnl_link_ops"), ip6erspan_newlink() correctly resolves the per-netns ip6gre hash via link_net. ip6erspan_changelink() was not converted in that series and still uses dev_net(dev), which diverges from the device's creation netns after IFLA_NET_NS_FD migration. This re-inserts the tunnel into the wrong per-netns hash. The original netns keeps a stale entry. When that netns is later destroyed, ip6gre_exit_rtnl_net() walks the stale entry, producing a slab-use-after-free reported by KASAN, followed by a kernel BUG at net/core/dev.c (LIST_POISON1) in unregister_netdevice_many_notify(). Reachable from an unprivileged user namespace (unshare --user --map-root-user --net). ip6gre_changelink() earlier in the same file already uses the cached t->net; only ip6erspan_changelink() has the wrong shape. Fixes: 2d665034f239 ("net: ip6_gre: Fix ip6erspan hlen calculation") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Maoyi Xie Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260430103318.3206018-1-maoyi.xie@ntu.edu.sg Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d19d86ed43766..6dea7caf5a777 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2261,10 +2261,11 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); + struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm p; - struct ip6_tnl *t; + struct ip6gre_net *ign; + ign = net_generic(t->net, ip6gre_net_id); t = ip6gre_changelink_common(dev, tb, data, &p, extack); if (IS_ERR(t)) return PTR_ERR(t); From 1bca036fe3607182c21f05e2b262167edbef086c Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Wed, 29 Apr 2026 16:37:43 +0800 Subject: [PATCH 383/554] net: libwx: use request_irq for VF misc interrupt commit 7a33345153eeeda195c55f15be27074e4c3b5109 upstream. Currently, request_threaded_irq() is used with a primary handler but a NULL threaded handler, while also setting the IRQF_ONESHOT flag. This specific combination triggers a WARNING since the commit aef30c8d569c ("genirq: Warn about using IRQF_ONESHOT without a threaded handler"). WARNING: kernel/irq/manage.c:1502 at __setup_irq+0x4fa/0x760 Fix the issue by switching to request_irq(), which is the appropriate interface or a non-threaded interrupt handler, and removing the unnecessary IRQF_ONESHOT flag. Fixes: eb4898fde1de ("net: libwx: add wangxun vf common api") Cc: stable@vger.kernel.org Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/786DDC7D5CCA6D0A+20260429083743.88961-2-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/wangxun/libwx/wx_vf_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c index ade2bfe563aaa..5478f2fdfce88 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c @@ -98,8 +98,8 @@ int wx_request_msix_irqs_vf(struct wx *wx) } } - err = request_threaded_irq(wx->msix_entry->vector, wx_msix_misc_vf, - NULL, IRQF_ONESHOT, netdev->name, wx); + err = request_irq(wx->msix_entry->vector, wx_msix_misc_vf, + 0, netdev->name, wx); if (err) { wx_err(wx, "request_irq for msix_other failed: %d\n", err); goto free_queue_irqs; From 3d47d62c134801eb260c2573ba9e5c4eb0651eb8 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 1 May 2026 02:58:41 -0700 Subject: [PATCH 384/554] netpoll: pass buffer size to egress_dev() to avoid MAC truncation commit 76b93a8107574006b25495664304ea9237494d70 upstream. egress_dev() formats np->dev_mac via snprintf() but receives buf as a bare char *, so it cannot derive the buffer size from the pointer. The size argument was hardcoded to MAC_ADDR_STR_LEN (3 * ETH_ALEN - 1 = 17), which is silly wrong in two ways: 1) misleading kernel log output on the MAC-selected target path (np->dev_name[0] == '\0'); for example "aa:bb:cc:dd:ee:ff doesn't exist, aborting" was logged as "aa:bb:cc:dd:ee:f doesn't exist, aborting". 2) the second argument of snprintf is the size of the buffer, not the size of what you want to write. Add a bufsz parameter to egress_dev() and pass sizeof(buf) from each caller, matching the standard snprintf() idiom and removing the hardcoded size from the helper. Every caller already declares "char buf[MAC_ADDR_STR_LEN + 1]" so the formatted MAC continues to fit. Tested by booting with netconsole=6665@/aa:bb:cc:dd:ee:ff,6666@10.0.0.1/00:11:22:33:44:55 on a kernel without a matching device. Pre-fix dmesg shows "aa:bb:cc:dd:ee:f doesn't exist, aborting"; post-fix shows the full "aa:bb:cc:dd:ee:ff doesn't exist, aborting". Fixes: f8a10bed32f5 ("netconsole: allow selection of egress interface via MAC address") Cc: stable@vger.kernel.org Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20260501-netpoll_snprintf_fix-v1-1-84b0566e6597@debian.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/core/netpoll.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 5af14f14a3623..6b1f470264c12 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -608,14 +608,16 @@ EXPORT_SYMBOL_GPL(__netpoll_setup); /* * Returns a pointer to a string representation of the identifier used * to select the egress interface for the given netpoll instance. buf - * must be a buffer of length at least MAC_ADDR_STR_LEN + 1. + * is used to format np->dev_mac when np->dev_name is empty; bufsz must + * be at least MAC_ADDR_STR_LEN + 1 to fit the formatted MAC address + * and its NUL terminator. */ -static char *egress_dev(struct netpoll *np, char *buf) +static char *egress_dev(struct netpoll *np, char *buf, size_t bufsz) { if (np->dev_name[0]) return np->dev_name; - snprintf(buf, MAC_ADDR_STR_LEN, "%pM", np->dev_mac); + snprintf(buf, bufsz, "%pM", np->dev_mac); return buf; } @@ -645,7 +647,7 @@ static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev) if (!IS_ENABLED(CONFIG_IPV6)) { np_err(np, "IPv6 is not supported %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EINVAL; } @@ -667,7 +669,7 @@ static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev) } if (err) { np_err(np, "no IPv6 address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return err; } @@ -687,14 +689,14 @@ static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev) in_dev = __in_dev_get_rtnl(ndev); if (!in_dev) { np_err(np, "no IP address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EDESTADDRREQ; } ifa = rtnl_dereference(in_dev->ifa_list); if (!ifa) { np_err(np, "no IP address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EDESTADDRREQ; } @@ -719,7 +721,8 @@ int netpoll_setup(struct netpoll *np) ndev = dev_getbyhwaddr(net, ARPHRD_ETHER, np->dev_mac); if (!ndev) { - np_err(np, "%s doesn't exist, aborting\n", egress_dev(np, buf)); + np_err(np, "%s doesn't exist, aborting\n", + egress_dev(np, buf, sizeof(buf))); err = -ENODEV; goto unlock; } @@ -727,14 +730,14 @@ int netpoll_setup(struct netpoll *np) if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); err = -EBUSY; goto put; } if (!netif_running(ndev)) { np_info(np, "device %s not up yet, forcing it\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); err = dev_open(ndev, NULL); if (err) { From 14ef6fd18db2494098b21e0471bf27a1d8e9993e Mon Sep 17 00:00:00 2001 From: Nan Li Date: Fri, 1 May 2026 09:08:44 +0800 Subject: [PATCH 385/554] net/rds: handle zerocopy send cleanup before the message is queued commit 44b550d88b267320459d518c0743a241ab2108fa upstream. A zerocopy send can fail after user pages have been pinned but before the message is attached to the sending socket. The purge path currently infers zerocopy state from rm->m_rs, so an unqueued message can be cleaned up as if it owned normal payload pages. However, zerocopy ownership is really determined by the presence of op_mmp_znotifier, regardless of whether the message has reached the socket queue. Capture op_mmp_znotifier up front in rds_message_purge() and use it as the cleanup discriminator. If the message is already associated with a socket, keep the existing completion path. Otherwise, drop the pinned page accounting directly and release the notifier before putting the payload pages. This keeps early send failure cleanup consistent with the zerocopy lifetime rules without changing the normal queued completion path. Fixes: 0cebaccef3ac ("rds: zerocopy Tx support.") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Co-developed-by: Xiao Liu Signed-off-by: Xiao Liu Signed-off-by: Nan Li Signed-off-by: Ren Wei Reviewed-by: Allison Henderson Link: https://patch.msgid.link/d2ea98a6313d5467bac00f7c9fef8c7acddb9258.1777550074.git.tonanli66@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/rds/message.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/rds/message.c b/net/rds/message.c index 199a899a43e9c..bdf5af2547577 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -129,24 +129,34 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs, */ static void rds_message_purge(struct rds_message *rm) { + struct rds_znotifier *znotifier; unsigned long i, flags; - bool zcopy = false; + bool zcopy; if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) return; spin_lock_irqsave(&rm->m_rs_lock, flags); + znotifier = rm->data.op_mmp_znotifier; + rm->data.op_mmp_znotifier = NULL; + zcopy = !!znotifier; + if (rm->m_rs) { struct rds_sock *rs = rm->m_rs; - if (rm->data.op_mmp_znotifier) { - zcopy = true; - rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier); + if (znotifier) { + rds_rm_zerocopy_callback(rs, znotifier); rds_wake_sk_sleep(rs); - rm->data.op_mmp_znotifier = NULL; } sock_put(rds_rs_to_sk(rs)); rm->m_rs = NULL; + } else if (znotifier) { + /* + * Zerocopy can fail before the message is queued on the + * socket, so there is no rs to carry the notification. + */ + mm_unaccount_pinned_pages(&znotifier->z_mmp); + kfree(rds_info_from_znotifier(znotifier)); } spin_unlock_irqrestore(&rm->m_rs_lock, flags); From 2b56d7903ab804481f5233a259d5f341e9fd513c Mon Sep 17 00:00:00 2001 From: Pavitra Jha Date: Fri, 1 May 2026 07:07:12 -0400 Subject: [PATCH 386/554] net: wwan: t7xx: validate port_count against message length in t7xx_port_enum_msg_handler commit 0e7c074cfcd9bd93765505f9eb8b42f03ed2a744 upstream. t7xx_port_enum_msg_handler() uses the modem-supplied port_count field as a loop bound over port_msg->data[] without checking that the message buffer contains sufficient data. A modem sending port_count=65535 in a 12-byte buffer triggers a slab-out-of-bounds read of up to 262140 bytes. Add a sizeof(*port_msg) check before accessing the port message header fields to guard against undersized messages. Add a struct_size() check after extracting port_count and before the loop. In t7xx_parse_host_rt_data(), guard the rt_feature header read with a remaining-buffer check before accessing data_len, validate feat_data_len against the actual remaining buffer to prevent OOB reads and signed integer overflow on offset. Pass msg_len from both call sites: skb->len at the DPMAIF path after skb_pull(), and the validated feat_data_len at the handshake path. Fixes: da45d2566a1d ("net: wwan: t7xx: Add control port") Cc: stable@vger.kernel.org Signed-off-by: Pavitra Jha Link: https://patch.msgid.link/20260501110713.145563-1-jhapavitra98@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/wwan/t7xx/t7xx_modem_ops.c | 20 +++++++++++++++++--- drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c | 18 ++++++++++++++++-- drivers/net/wwan/t7xx/t7xx_port_proxy.h | 2 +- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.c b/drivers/net/wwan/t7xx/t7xx_modem_ops.c index 7968e208dd37c..adb29d30c63fe 100644 --- a/drivers/net/wwan/t7xx/t7xx_modem_ops.c +++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.c @@ -457,8 +457,20 @@ static int t7xx_parse_host_rt_data(struct t7xx_fsm_ctl *ctl, struct t7xx_sys_inf offset = sizeof(struct feature_query); for (i = 0; i < FEATURE_COUNT && offset < data_length; i++) { + size_t remaining = data_length - offset; + size_t feat_data_len, feat_total; + + if (remaining < sizeof(*rt_feature)) + break; + rt_feature = data + offset; - offset += sizeof(*rt_feature) + le32_to_cpu(rt_feature->data_len); + feat_data_len = le32_to_cpu(rt_feature->data_len); + + if (feat_data_len > remaining - sizeof(*rt_feature)) + break; + + feat_total = sizeof(*rt_feature) + feat_data_len; + offset += feat_total; ft_spt_cfg = FIELD_GET(FEATURE_MSK, core->feature_set[i]); if (ft_spt_cfg != MTK_FEATURE_MUST_BE_SUPPORTED) @@ -468,8 +480,10 @@ static int t7xx_parse_host_rt_data(struct t7xx_fsm_ctl *ctl, struct t7xx_sys_inf if (ft_spt_st != MTK_FEATURE_MUST_BE_SUPPORTED) return -EINVAL; - if (i == RT_ID_MD_PORT_ENUM || i == RT_ID_AP_PORT_ENUM) - t7xx_port_enum_msg_handler(ctl->md, rt_feature->data); + if (i == RT_ID_MD_PORT_ENUM || i == RT_ID_AP_PORT_ENUM) { + t7xx_port_enum_msg_handler(ctl->md, rt_feature->data, + feat_data_len); + } } return 0; diff --git a/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c b/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c index ae632ef966983..f869e4ed9ee9a 100644 --- a/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c +++ b/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c @@ -117,6 +117,7 @@ static int fsm_ee_message_handler(struct t7xx_port *port, struct t7xx_fsm_ctl *c * t7xx_port_enum_msg_handler() - Parse the port enumeration message to create/remove nodes. * @md: Modem context. * @msg: Message. + * @msg_len: Length of @msg in bytes. * * Used to control create/remove device node. * @@ -124,12 +125,18 @@ static int fsm_ee_message_handler(struct t7xx_port *port, struct t7xx_fsm_ctl *c * * 0 - Success. * * -EFAULT - Message check failure. */ -int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg) +int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg, size_t msg_len) { struct device *dev = &md->t7xx_dev->pdev->dev; unsigned int version, port_count, i; struct port_msg *port_msg = msg; + if (msg_len < sizeof(*port_msg)) { + dev_err(dev, "Port enum msg too short for header: need %zu, have %zu\n", + sizeof(*port_msg), msg_len); + return -EINVAL; + } + version = FIELD_GET(PORT_MSG_VERSION, le32_to_cpu(port_msg->info)); if (version != PORT_ENUM_VER || le32_to_cpu(port_msg->head_pattern) != PORT_ENUM_HEAD_PATTERN || @@ -141,6 +148,13 @@ int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg) } port_count = FIELD_GET(PORT_MSG_PRT_CNT, le32_to_cpu(port_msg->info)); + + if (msg_len < struct_size(port_msg, data, port_count)) { + dev_err(dev, "Port enum msg too short: need %zu, have %zu\n", + struct_size(port_msg, data, port_count), msg_len); + return -EINVAL; + } + for (i = 0; i < port_count; i++) { u32 port_info = le32_to_cpu(port_msg->data[i]); unsigned int ch_id; @@ -191,7 +205,7 @@ static int control_msg_handler(struct t7xx_port *port, struct sk_buff *skb) case CTL_ID_PORT_ENUM: skb_pull(skb, sizeof(*ctrl_msg_h)); - ret = t7xx_port_enum_msg_handler(ctl->md, (struct port_msg *)skb->data); + ret = t7xx_port_enum_msg_handler(ctl->md, (struct port_msg *)skb->data, skb->len); if (!ret) ret = port_ctl_send_msg_to_md(port, CTL_ID_PORT_ENUM, 0); else diff --git a/drivers/net/wwan/t7xx/t7xx_port_proxy.h b/drivers/net/wwan/t7xx/t7xx_port_proxy.h index f0918b36e899b..7c3190bf0fcf3 100644 --- a/drivers/net/wwan/t7xx/t7xx_port_proxy.h +++ b/drivers/net/wwan/t7xx/t7xx_port_proxy.h @@ -103,7 +103,7 @@ void t7xx_port_proxy_reset(struct port_proxy *port_prox); void t7xx_port_proxy_uninit(struct port_proxy *port_prox); int t7xx_port_proxy_init(struct t7xx_modem *md); void t7xx_port_proxy_md_status_notify(struct port_proxy *port_prox, unsigned int state); -int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg); +int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg, size_t msg_len); int t7xx_port_proxy_chl_enable_disable(struct port_proxy *port_prox, unsigned int ch_id, bool en_flag); void t7xx_port_proxy_set_cfg(struct t7xx_modem *md, enum port_cfg_id cfg_id); From 23ae72e8c2f1c1d1da8cbd479320ddcfcc9c7435 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Tue, 5 May 2026 05:34:03 +0000 Subject: [PATCH 387/554] platform/chrome: cros_ec_typec: Init mutex in Thunderbolt registration commit 525cb7ba6661074c1c5cc3772bccc6afab6791ef upstream. cros_typec_register_thunderbolt() missed initializing the `adata->lock` mutex. This leads to a NULL dereference when the mutex is later acquired (e.g. in cros_typec_altmode_work()). Initialize the mutex in cros_typec_register_thunderbolt() to fix the issue. Cc: stable@vger.kernel.org Fixes: 3b00be26b16a ("platform/chrome: cros_ec_typec: Thunderbolt support") Reviewed-by: Benson Leung Reviewed-by: Abhishek Pandit-Subedi Link: https://lore.kernel.org/r/20260505053403.3335740-1-tzungbi@kernel.org Signed-off-by: Tzung-Bi Shih Signed-off-by: Greg Kroah-Hartman --- drivers/platform/chrome/cros_typec_altmode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/chrome/cros_typec_altmode.c b/drivers/platform/chrome/cros_typec_altmode.c index 557340b53af03..66c546bf89b53 100644 --- a/drivers/platform/chrome/cros_typec_altmode.c +++ b/drivers/platform/chrome/cros_typec_altmode.c @@ -359,6 +359,7 @@ cros_typec_register_thunderbolt(struct cros_typec_port *port, } INIT_WORK(&adata->work, cros_typec_altmode_work); + mutex_init(&adata->lock); adata->alt = alt; adata->port = port; adata->ap_mode_entry = true; From c9b620425fd03a1bbe922bcacf4a257ab4149c0f Mon Sep 17 00:00:00 2001 From: Hongling Zeng Date: Sun, 3 May 2026 12:17:44 +0800 Subject: [PATCH 388/554] parisc: Fix IRQ leak in LASI driver commit 37b0dc5e279f35036fb638d1e187197b6c05a76d upstream. When request_irq() succeeds but gsc_common_setup() fails later, the IRQ is never released. Fix this by adding proper error handling with goto labels to ensure resources are released in LIFO order. Detected by Smatch: drivers/parisc/lasi.c:216 lasi_init_chip() warn: 'lasi->gsc_irq.irq' from request_irq() not released on lines: 207. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202604180957.4QdAIxP6-lkp@intel.com/ Signed-off-by: Hongling Zeng Cc: stable@vger.kernel.org Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/parisc/lasi.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/parisc/lasi.c b/drivers/parisc/lasi.c index 73c93e9cfa512..86ef05fba2170 100644 --- a/drivers/parisc/lasi.c +++ b/drivers/parisc/lasi.c @@ -193,8 +193,7 @@ static int __init lasi_init_chip(struct parisc_device *dev) ret = request_irq(lasi->gsc_irq.irq, gsc_asic_intr, 0, "lasi", lasi); if (ret < 0) { - kfree(lasi); - return ret; + goto err_free; } /* enable IRQ's for devices below LASI */ @@ -203,8 +202,7 @@ static int __init lasi_init_chip(struct parisc_device *dev) /* Done init'ing, register this driver */ ret = gsc_common_setup(dev, lasi); if (ret) { - kfree(lasi); - return ret; + goto err_irq; } gsc_fixup_irqs(dev, lasi, lasi_choose_irq); @@ -214,6 +212,12 @@ static int __init lasi_init_chip(struct parisc_device *dev) SYS_OFF_PRIO_DEFAULT, lasi_power_off, lasi); return ret; + +err_irq: + free_irq(lasi->gsc_irq.irq, lasi); +err_free: + kfree(lasi); + return ret; } static struct parisc_device_id lasi_tbl[] __initdata = { From 22b365ba1af3d8c6036b8e5112fffe80998b85a0 Mon Sep 17 00:00:00 2001 From: Ivan Hu Date: Thu, 30 Apr 2026 15:41:07 +0800 Subject: [PATCH 389/554] x86/efi: Fix graceful fault handling after FPU softirq changes commit 088f65e206087bf903743bd18417261d7a4c9644 upstream. Since commit d02198550423 ("x86/fpu: Improve crypto performance by making kernel-mode FPU reliably usable in softirqs"), kernel_fpu_begin() calls fpregs_lock() which uses local_bh_disable() instead of the previous preempt_disable(). This sets SOFTIRQ_OFFSET in preempt_count during the entire EFI runtime service call, causing in_interrupt() to return true in normal task context. The graceful page fault handler efi_crash_gracefully_on_page_fault() uses in_interrupt() to bail out for faults in real interrupt context. With SOFTIRQ_OFFSET now set, the handler always bails out, leaving EFI firmware page faults unhandled. This escalates to die() which also sees in_interrupt() as true and calls panic("Fatal exception in interrupt"), resulting in a hard system freeze. On systems with buggy firmware that triggers page faults during EFI runtime calls (e.g., accessing unmapped memory in GetTime()), this causes an unrecoverable hang instead of the expected graceful EFI_ABORTED recovery. Fix by replacing in_interrupt() with !in_task(). This preserves the original intent of bailing for interrupts or NMI faults, while no longer falsely triggering from the FPU code path's local_bh_disable(). Fixes: d02198550423 ("x86/fpu: Improve crypto performance by making kernel-mode FPU reliably usable in softirqs") Cc: Signed-off-by: Ivan Hu [ardb: Sashiko spotted that using 'in_hardirq() || in_nmi()' leaves a window where a softirq may be taken before fpregs_lock() is called, but after efi_rts_work.efi_rts_id has been assigned, and any page faults occurring in that window will then be misidentified as having been caused by the firmware. Instead, use !in_task(), which incorporates in_serving_softirq(). ] Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/platform/efi/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index aa6b26c9cb308..1f234c33c85a7 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -771,7 +771,7 @@ void efi_crash_gracefully_on_page_fault(unsigned long phys_addr, * If we get an interrupt/NMI while processing an EFI runtime service * then this is a regular OOPS, not an EFI failure. */ - if (in_interrupt()) + if (!in_task()) return; /* From 357ef14bfde2adb4ea43d94410ce43912c2abddc Mon Sep 17 00:00:00 2001 From: Sanman Pradhan Date: Thu, 16 Apr 2026 21:59:30 +0000 Subject: [PATCH 390/554] hwmon: (ltc2992) Clamp threshold writes to hardware range commit d6cc7c99bf1f73eda7d565d224d791d16239bb41 upstream. ltc2992_set_voltage(), ltc2992_set_current(), and ltc2992_set_power() do not validate the user-supplied value before converting it to a register value. This can result in: 1. Negative input values wrapping to large positive register values. For power, the negative long is implicitly cast to u64 in mul_u64_u32_div(), producing an incorrect value. For voltage and current, the negative converted value wraps when passed to ltc2992_write_reg() as a u32. 2. Intermediate arithmetic exceeding the range representable in u64 on 64-bit platforms. In ltc2992_set_voltage(), (u64)val * 1000 can exceed U64_MAX when val is a large positive long. In ltc2992_set_current(), (u64)val * r_sense_uohm can overflow similarly. In ltc2992_set_power(), the computed value may not fit in u64. 3. Register values exceeding the hardware field width. Voltage and current threshold registers are 12-bit (stored left-justified in 16 bits), and power threshold registers are 24-bit. Without clamping, bits above the field width are truncated in ltc2992_write_reg(). Fix by clamping negative values to zero, clamping positive values to the rounded hardware-representable maximum (the value returned by the read path for a full-scale register) to prevent intermediate overflow, and clamping the converted register value to the hardware field width before writing. The existing conversion formula and rounding behavior are preserved. In the power write path, cancel the factor of 1000 from both the numerator (r_sense_uohm * 1000) and the denominator (VADC_UV_LSB * IADC_NANOV_LSB) to also eliminate a u32 overflow of r_sense_uohm * 1000 when r_sense_uohm exceeds about 4.29 ohms. Fixes: b0bd407e94b03 ("hwmon: (ltc2992) Add support") Cc: stable@vger.kernel.org Signed-off-by: Sanman Pradhan Link: https://lore.kernel.org/r/20260416215904.101969-2-sanman.pradhan@hpe.com Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/ltc2992.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c index 1fcd320d61619..1069736196763 100644 --- a/drivers/hwmon/ltc2992.c +++ b/drivers/hwmon/ltc2992.c @@ -431,10 +431,16 @@ static int ltc2992_get_voltage(struct ltc2992_state *st, u32 reg, u32 scale, lon static int ltc2992_set_voltage(struct ltc2992_state *st, u32 reg, u32 scale, long val) { - val = DIV_ROUND_CLOSEST(val * 1000, scale); - val = val << 4; + u32 reg_val; + long vmax; + + vmax = DIV_ROUND_CLOSEST_ULL(0xFFFULL * scale, 1000); + val = max(val, 0L); + val = min(val, vmax); + reg_val = min(DIV_ROUND_CLOSEST_ULL((u64)val * 1000, scale), + 0xFFFULL) << 4; - return ltc2992_write_reg(st, reg, 2, val); + return ltc2992_write_reg(st, reg, 2, reg_val); } static int ltc2992_read_gpio_alarm(struct ltc2992_state *st, int nr_gpio, u32 attr, long *val) @@ -559,9 +565,15 @@ static int ltc2992_get_current(struct ltc2992_state *st, u32 reg, u32 channel, l static int ltc2992_set_current(struct ltc2992_state *st, u32 reg, u32 channel, long val) { u32 reg_val; + long cmax; - reg_val = DIV_ROUND_CLOSEST(val * st->r_sense_uohm[channel], LTC2992_IADC_NANOV_LSB); - reg_val = reg_val << 4; + cmax = DIV_ROUND_CLOSEST_ULL(0xFFFULL * LTC2992_IADC_NANOV_LSB, + st->r_sense_uohm[channel]); + val = max(val, 0L); + val = min(val, cmax); + reg_val = min(DIV_ROUND_CLOSEST_ULL((u64)val * st->r_sense_uohm[channel], + LTC2992_IADC_NANOV_LSB), + 0xFFFULL) << 4; return ltc2992_write_reg(st, reg, 2, reg_val); } @@ -634,9 +646,18 @@ static int ltc2992_get_power(struct ltc2992_state *st, u32 reg, u32 channel, lon static int ltc2992_set_power(struct ltc2992_state *st, u32 reg, u32 channel, long val) { u32 reg_val; - - reg_val = mul_u64_u32_div(val, st->r_sense_uohm[channel] * 1000, - LTC2992_VADC_UV_LSB * LTC2992_IADC_NANOV_LSB); + u64 pmax, uval; + + uval = max(val, 0L); + pmax = mul_u64_u32_div(0xFFFFFFULL, + LTC2992_VADC_UV_LSB / 1000 * + LTC2992_IADC_NANOV_LSB, + st->r_sense_uohm[channel]); + uval = min(uval, pmax); + reg_val = min(mul_u64_u32_div(uval, st->r_sense_uohm[channel], + LTC2992_VADC_UV_LSB / 1000 * + LTC2992_IADC_NANOV_LSB), + 0xFFFFFFULL); return ltc2992_write_reg(st, reg, 3, reg_val); } From 13a0f0204d54e038eb3c4e59842e6309256e575f Mon Sep 17 00:00:00 2001 From: Sanman Pradhan Date: Thu, 16 Apr 2026 21:59:40 +0000 Subject: [PATCH 391/554] hwmon: (ltc2992) Fix u32 overflow in power read path commit 2da0c1fd01dbd6b22844e8676585153dfc660cbe upstream. ltc2992_get_power() computes the divisor for mul_u64_u32_div() as r_sense_uohm * 1000. This multiplication overflows u32 when r_sense_uohm exceeds about 4.29 ohms (4294967 micro-ohms), producing a truncated divisor and an incorrect power reading. Cancel the factor of 1000 from both the numerator (VADC_UV_LSB * IADC_NANOV_LSB = 312500000) and the divisor (r_sense_uohm * 1000), giving (VADC_UV_LSB / 1000) * IADC_NANOV_LSB = 312500 as the numerator and plain r_sense_uohm as the divisor. The cancellation is exact because LTC2992_VADC_UV_LSB (25000) is divisible by 1000. This is the read-path counterpart of the write-path fix applied in the preceding patch. Fixes: b0bd407e94b03 ("hwmon: (ltc2992) Add support") Cc: stable@vger.kernel.org Signed-off-by: Sanman Pradhan Link: https://lore.kernel.org/r/20260416215904.101969-3-sanman.pradhan@hpe.com Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/ltc2992.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c index 1069736196763..2617c4538af91 100644 --- a/drivers/hwmon/ltc2992.c +++ b/drivers/hwmon/ltc2992.c @@ -637,8 +637,10 @@ static int ltc2992_get_power(struct ltc2992_state *st, u32 reg, u32 channel, lon if (reg_val < 0) return reg_val; - *val = mul_u64_u32_div(reg_val, LTC2992_VADC_UV_LSB * LTC2992_IADC_NANOV_LSB, - st->r_sense_uohm[channel] * 1000); + *val = mul_u64_u32_div(reg_val, + LTC2992_VADC_UV_LSB / 1000 * + LTC2992_IADC_NANOV_LSB, + st->r_sense_uohm[channel]); return 0; } From 370d4deba194ae37148a7304b01b338036df32ca Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Apr 2026 11:50:27 +0200 Subject: [PATCH 392/554] clk: rk808: fix OF node reference imbalance commit de019f203b0d472c98ead4081ad4f05d92c9b826 upstream. The driver reuses the OF node of the parent multi-function device but fails to take another reference to balance the one dropped by the platform bus code when unbinding the MFD and deregistering the child devices. Fix this by using the intended helper for reusing OF nodes. Fixes: 2dc51ca822e4 ("clk: RK808: Reduce 'struct rk808' usage") Cc: stable@vger.kernel.org # 6.5 Cc: Sebastian Reichel Signed-off-by: Johan Hovold Reviewed-by: Sebastian Reichel Reviewed-by: Brian Masney Reviewed-by: Heiko Stuebner Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk-rk808.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-rk808.c b/drivers/clk/clk-rk808.c index f7412b137e5ef..5a75b5c915551 100644 --- a/drivers/clk/clk-rk808.c +++ b/drivers/clk/clk-rk808.c @@ -153,7 +153,7 @@ static int rk808_clkout_probe(struct platform_device *pdev) struct rk808_clkout *rk808_clkout; int ret; - dev->of_node = pdev->dev.parent->of_node; + device_set_of_node_from_dev(dev, dev->parent); rk808_clkout = devm_kzalloc(dev, sizeof(*rk808_clkout), GFP_KERNEL); From aa4d6fee0449c376fc3b73f1fb223106c126bd57 Mon Sep 17 00:00:00 2001 From: Myeonghun Pak Date: Fri, 24 Apr 2026 22:50:51 +0900 Subject: [PATCH 393/554] hwmon: (corsair-psu) Close HID device on probe errors commit 174606451fbb17db506ebaacdd5e203e57773d5f upstream. corsairpsu_probe() opens the HID device before sending the device init and firmware-info commands. If either command fails, the error path jumps directly to fail_and_stop and skips hid_hw_close(). Use the existing fail_and_close label for those post-open failures so the open count and low-level close callback are balanced before hid_hw_stop(). Fixes: d115b51e0e56 ("hwmon: add Corsair PSU HID controller driver") Cc: stable@vger.kernel.org Signed-off-by: Myeonghun Pak Reviewed-by: Wilken Gottwalt Link: https://lore.kernel.org/r/20260424135107.13720-1-mhun512@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/corsair-psu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c index 6b5c8f2007803..ea28c9219507a 100644 --- a/drivers/hwmon/corsair-psu.c +++ b/drivers/hwmon/corsair-psu.c @@ -805,13 +805,13 @@ static int corsairpsu_probe(struct hid_device *hdev, const struct hid_device_id ret = corsairpsu_init(priv); if (ret < 0) { dev_err(&hdev->dev, "unable to initialize device (%d)\n", ret); - goto fail_and_stop; + goto fail_and_close; } ret = corsairpsu_fwinfo(priv); if (ret < 0) { dev_err(&hdev->dev, "unable to query firmware (%d)\n", ret); - goto fail_and_stop; + goto fail_and_close; } corsairpsu_get_criticals(priv); From c34c41446acf6c0d13b5b06c809be11e0f7f2729 Mon Sep 17 00:00:00 2001 From: Jiexun Wang Date: Wed, 6 May 2026 22:08:23 +0800 Subject: [PATCH 394/554] af_unix: Reject SIOCATMARK on non-stream sockets commit d119775f2bad827edc28071c061fdd4a91f889a5 upstream. SIOCATMARK reports whether the receive queue is at the urgent mark for MSG_OOB. In AF_UNIX, MSG_OOB is supported only for SOCK_STREAM sockets. SOCK_DGRAM and SOCK_SEQPACKET reject MSG_OOB in sendmsg() and recvmsg(), so they should not support SIOCATMARK either. Return -EOPNOTSUPP for non-stream sockets before checking the receive queue. Fixes: 314001f0bf92 ("af_unix: Add OOB support") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Suggested-by: Kuniyuki Iwashima Signed-off-by: Jiexun Wang Signed-off-by: Ren Wei Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260506140825.2987635-1-n05ec@lzu.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6b251c76f6bec..a09c732748945 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3339,6 +3339,9 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct sk_buff *skb; int answ = 0; + if (sk->sk_type != SOCK_STREAM) + return -EOPNOTSUPP; + mutex_lock(&u->iolock); skb = skb_peek(&sk->sk_receive_queue); From 8aacf87718b90af440e323e0f5f5d14aeb343090 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 5 May 2026 09:02:13 -0700 Subject: [PATCH 395/554] arm64/fpsimd: ptrace: zero target's fpsimd_state, not the tracer's commit 5cbb61bf4168859d97c068d88d364f4f1f440325 upstream. sve_set_common() is the backend for PTRACE_SETREGSET(NT_ARM_SVE) and PTRACE_SETREGSET(NT_ARM_SSVE). Every write in the function operates on the tracee (target) - except a single memset that uses current instead, zeroing the tracer's saved V0-V31 / FPSR / FPCR shadow on every ptrace SETREGSET call. The memset is meant to give the tracee a defined zero register image before the user-supplied payload is copied in (for partial writes, header-only writes, and FPSIMD<->SVE format switches). Aiming it at current both denies the tracee that clean slate and silently corrupts the tracer. The corruption of the tracer's saved FPSIMD state is not always observable. Where the tracer's state is live on a CPU, this may be reused without loading the corrupted state from memory, and will eventually be written back over the corrupted state. Where the tracer's state is saved in SVE_PT_REGS_SVE format, only the FPSR and FPCR are clobbered, and the effective copy of the vectors is in the task's sve_state. Reproducible on an arm64 kernel with SVE: a single-threaded tracer that loads a known pattern into V0-V31, issues PTRACE_SETREGSET(NT_ARM_SVE) on a child, and reads V0-V31 back observes them all zeroed within tens of thousands of iterations when a sibling thread keeps stealing the FPSIMD CPU binding. Fixes: 316283f276eb ("arm64/fpsimd: ptrace: Consistently handle partial writes to NT_ARM_(S)SVE") Cc: Signed-off-by: Breno Leitao Acked-by: Mark Rutland Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 2e9ce5a45ed2d..2acc2c3dd033e 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -957,8 +957,8 @@ static int sve_set_common(struct task_struct *target, } /* Always zero V regs, FPSR, and FPCR */ - memset(¤t->thread.uw.fpsimd_state, 0, - sizeof(current->thread.uw.fpsimd_state)); + memset(&target->thread.uw.fpsimd_state, 0, + sizeof(target->thread.uw.fpsimd_state)); /* Registers: FPSIMD-only case */ From cb27e43c0511e9e1ca8818d231656070b11c18cf Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Wed, 8 Apr 2026 14:11:21 +0000 Subject: [PATCH 396/554] pmdomain: mediatek: fix use-after-free in scpsys_get_bus_protection_legacy() commit ec1fcddb3117d9452210e838fd37389ee61e10e8 upstream. In scpsys_get_bus_protection_legacy(), of_find_node_with_property() returns a device node with its reference count incremented. The function then calls of_node_put(node) before checking whether syscon_regmap_lookup_by_phandle() returns an error. If an error occurs, dev_err_probe() dereferences the node pointer to print diagnostic information, but the node memory may have already been freed due to the earlier of_node_put(), leading to a use-after-free vulnerability. Fix this by moving the of_node_put() call after the error check, ensuring the node is still valid when accessed in the error path. Fixes: c29345fa5f66 ("pmdomain: mediatek: Refactor bus protection regmaps retrieval") Cc: stable@vger.kernel.org Signed-off-by: Wentao Liang Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/pmdomain/mediatek/mtk-pm-domains.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pmdomain/mediatek/mtk-pm-domains.c b/drivers/pmdomain/mediatek/mtk-pm-domains.c index 269634bcd9a40..1716d726b8cca 100644 --- a/drivers/pmdomain/mediatek/mtk-pm-domains.c +++ b/drivers/pmdomain/mediatek/mtk-pm-domains.c @@ -757,6 +757,7 @@ static int scpsys_get_bus_protection_legacy(struct device *dev, struct scpsys *s struct device_node *node, *smi_np; int num_regmaps = 0, i, j; struct regmap *regmap[3]; + int ret = 0; /* * Legacy code retrieves a maximum of three bus protection handles: @@ -807,11 +808,14 @@ static int scpsys_get_bus_protection_legacy(struct device *dev, struct scpsys *s if (node) { regmap[2] = syscon_regmap_lookup_by_phandle(node, "mediatek,infracfg-nao"); num_regmaps++; - of_node_put(node); - if (IS_ERR(regmap[2])) - return dev_err_probe(dev, PTR_ERR(regmap[2]), + if (IS_ERR(regmap[2])) { + ret = dev_err_probe(dev, PTR_ERR(regmap[2]), "%pOF: failed to get infracfg regmap\n", node); + of_node_put(node); + return ret; + } + of_node_put(node); } else { regmap[2] = NULL; } From f17d521075325b8afc42d1baa1c28a5e9aca111f Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Fri, 10 Apr 2026 15:34:13 +0000 Subject: [PATCH 397/554] block: add pgmap check to biovec_phys_mergeable commit 13920e4b7b784b40cf4519ff1f0f3e513476a499 upstream. biovec_phys_mergeable() is used by the request merge, DMA mapping, and integrity merge paths to decide if two physically contiguous bvec segments can be coalesced into one. It currently has no check for whether the segments belong to different dev_pagemaps. When zone device memory is registered in multiple chunks, each chunk gets its own dev_pagemap. A single bio can legitimately contain bvecs from different pgmaps -- iov_iter_extract_bvecs() breaks at pgmap boundaries but the outer loop in bio_iov_iter_get_pages() continues filling the same bio. If such bvecs are physically contiguous, biovec_phys_mergeable() will coalesce them, making it impossible to recover the correct pgmap for the merged segment via page_pgmap(). Add a zone_device_pages_have_same_pgmap() check to prevent merging bvec segments that span different pgmaps. Fixes: 49580e690755 ("block: add check when merging zone device pages") Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Naman Jain Link: https://patch.msgid.link/20260410153414.4159050-2-namjain@linux.microsoft.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk.h b/block/blk.h index 80a6a942a10af..59bd91a4acc4d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -132,6 +132,8 @@ static inline bool biovec_phys_mergeable(struct request_queue *q, if (addr1 + vec1->bv_len != addr2) return false; + if (!zone_device_pages_have_same_pgmap(vec1->bv_page, vec2->bv_page)) + return false; if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2->bv_page)) return false; if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask)) From 281a0014f46f22a1e964b0096a4e698c27888c95 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 4 May 2026 08:34:32 -0600 Subject: [PATCH 398/554] block: only read from sqe on initial invocation of blkdev_uring_cmd() commit 212ec34e4e726e8cd4af7bea4740db24de8a9dab upstream. This passthrough helper currently only supports discards. Part of that command is the start and length, which is read from the SQE. It does so on every invocation, where it really should just make it stable on the first invocation. This avoids needing to copy the SQE upfront, as we only really need those two 8b values stored in our per-req payload. Cc: stable@vger.kernel.org # 6.17+ Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/ioctl.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/block/ioctl.c b/block/ioctl.c index d7489a56b33c3..08e4981506cbc 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -765,6 +765,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) #endif struct blk_iou_cmd { + u64 start; + u64 len; int res; bool nowait; }; @@ -852,23 +854,27 @@ int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); - const struct io_uring_sqe *sqe = cmd->sqe; u32 cmd_op = cmd->cmd_op; - uint64_t start, len; - if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || - sqe->rw_flags || sqe->file_index)) - return -EINVAL; + /* Read what we need from the SQE on the first issue */ + if (!(issue_flags & IORING_URING_CMD_REISSUE)) { + const struct io_uring_sqe *sqe = cmd->sqe; + + if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || + sqe->rw_flags || sqe->file_index)) + return -EINVAL; + + bic->start = READ_ONCE(sqe->addr); + bic->len = READ_ONCE(sqe->addr3); + } bic->res = 0; bic->nowait = issue_flags & IO_URING_F_NONBLOCK; - start = READ_ONCE(sqe->addr); - len = READ_ONCE(sqe->addr3); - switch (cmd_op) { case BLOCK_URING_CMD_DISCARD: - return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); + return blkdev_cmd_discard(cmd, bdev, bic->start, bic->len, + bic->nowait); } return -EINVAL; } From 064f2e49199c28095c3739c3decc9e90f05f1d73 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 28 Apr 2026 21:37:47 +0530 Subject: [PATCH 399/554] cifs: abort open_cached_dir if we don't request leases commit d68ce834f8cf6cb2e77f3331df65166b35466b53 upstream. It is possible that SMB2_open_init may not set lease context based on the requested oplock level. This can happen when leases have been temporarily or permanently disabled. When this happens, we will have open_cached_dir making an open without lease context and the response will anyway be rejected by open_cached_dir (thereby forcing a close to discard this open). That's unnecessary two round-trips to the server. This change adds a check before making the open request to the server to make sure that SMB2_open_init did add the expected lease context to the open in open_cached_dir. Cc: Reviewed-by: Bharath SM Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cached_dir.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 09939fe9666ea..de687b491df46 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -286,6 +286,14 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, &rqst[0], &oplock, &oparms, utf16_path); if (rc) goto oshr_free; + + if (oplock != SMB2_OPLOCK_LEVEL_II) { + rc = -EINVAL; + cifs_dbg(FYI, "%s: Oplock level %d not suitable for cached directory\n", + __func__, oplock); + goto oshr_free; + } + smb2_set_next_command(tcon, &rqst[0]); memset(&qi_iov, 0, sizeof(qi_iov)); From 196efa8b254b51dd269a404a2b1e7e27b762fd46 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 30 Mar 2026 16:19:59 +0530 Subject: [PATCH 400/554] cifs: change_conf needs to be called for session setup commit c208a2b95811d6e1ebae65d0d2fc13f73707f8e7 upstream. Today we skip calling change_conf for negotiates and session setup requests. This can be a problem for mchan as the immediate next call after session setup could be due to an I/O that is made on the mount point. For single channel, this is not a problem as there will be several calls after setting up session. This change enforces calling change_conf when the total credits contain enough for reservations for echoes and oplocks. We expect this to happen during the last session setup response. This way, echoes and oplocks are not disabled before the first request to the server. So if that first request is an open, it does not need to disable requesting leases. Cc: Reviewed-by: Bharath SM Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2ops.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 0497c97195644..eed3a71171c0b 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -111,10 +111,21 @@ smb2_add_credits(struct TCP_Server_Info *server, cifs_trace_rw_credits_zero_in_flight); } server->in_flight--; + + /* + * Rebalance credits when an op drains in_flight. For session setup, + * do this only when the total accumulated credits are high enough (>2) + * so that a newly established secondary channel can reserve credits for + * echoes and oplocks. We expect this to happen at the end of the final + * session setup response. + */ if (server->in_flight == 0 && ((optype & CIFS_OP_MASK) != CIFS_NEG_OP) && ((optype & CIFS_OP_MASK) != CIFS_SESS_OP)) rc = change_conf(server); + else if (server->in_flight == 0 && + ((optype & CIFS_OP_MASK) == CIFS_SESS_OP) && *val > 2) + rc = change_conf(server); /* * Sometimes server returns 0 credits on oplock break ack - we need to * rebalance credits in this case. From bbf63275d4a5ee28c872bd730a1d788e59c23a13 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Sat, 15 Nov 2025 10:59:05 +0800 Subject: [PATCH 401/554] extcon: ptn5150: handle pending IRQ events during system resume commit 4652fefcda3c604c83d1ae28ede94544e2142f06 upstream. When the system is suspended and ptn5150 wakeup interrupt is disabled, any changes on ptn5150 will only be record in interrupt status registers and won't fire an IRQ since its trigger type is falling edge. So the HW interrupt line will keep at low state and any further changes won't trigger IRQ anymore. To fix it, this will schedule a work to check whether any IRQ are pending and handle it accordingly. Fixes: 4ed754de2d66 ("extcon: Add support for ptn5150 extcon driver") Cc: stable@vger.kernel.org Reviewed-by: Krzysztof Kozlowski Acked-by: MyungJoo Ham Signed-off-by: Xu Yang Signed-off-by: Chanwoo Choi Link: https://lore.kernel.org/lkml/20251115025905.1395347-1-xu.yang_2@nxp.com/ Signed-off-by: Greg Kroah-Hartman --- drivers/extcon/extcon-ptn5150.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/extcon/extcon-ptn5150.c b/drivers/extcon/extcon-ptn5150.c index 78ad86c4a3bee..31970fb34fcb2 100644 --- a/drivers/extcon/extcon-ptn5150.c +++ b/drivers/extcon/extcon-ptn5150.c @@ -331,6 +331,19 @@ static int ptn5150_i2c_probe(struct i2c_client *i2c) return 0; } +static int ptn5150_resume(struct device *dev) +{ + struct i2c_client *i2c = to_i2c_client(dev); + struct ptn5150_info *info = i2c_get_clientdata(i2c); + + /* Need to check possible pending interrupt events */ + schedule_work(&info->irq_work); + + return 0; +} + +static DEFINE_SIMPLE_DEV_PM_OPS(ptn5150_pm_ops, NULL, ptn5150_resume); + static const struct of_device_id ptn5150_dt_match[] = { { .compatible = "nxp,ptn5150" }, { }, @@ -346,6 +359,7 @@ MODULE_DEVICE_TABLE(i2c, ptn5150_i2c_id); static struct i2c_driver ptn5150_i2c_driver = { .driver = { .name = "ptn5150", + .pm = pm_sleep_ptr(&ptn5150_pm_ops), .of_match_table = ptn5150_dt_match, }, .probe = ptn5150_i2c_probe, From 5b958822126dbe7706a6551cb4900900f724844c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 9 Mar 2026 13:42:37 +0100 Subject: [PATCH 402/554] gpio: of: clear OF_POPULATED on hog nodes in remove path commit bbee90e750262bfb406d66dc65c46d616d2b6673 upstream. The previously set OF_POPULATED flag should be cleared on the hog nodes when removing the chip. Cc: stable@vger.kernel.org Fixes: 63636d956c455 ("gpio: of: Add DT overlay support for GPIO hogs") Acked-by: Linus Walleij Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20260309-gpio-hog-fwnode-v2-1-4e61f3dbf06a@oss.qualcomm.com Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-of.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index fad4edf9cc5c0..7bb33c46788c6 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -1285,7 +1285,14 @@ int of_gpiochip_add(struct gpio_chip *chip) void of_gpiochip_remove(struct gpio_chip *chip) { - of_node_put(dev_of_node(&chip->gpiodev->dev)); + struct device_node *np = dev_of_node(&chip->gpiodev->dev); + + for_each_child_of_node_scoped(np, child) { + if (of_property_present(child, "gpio-hog")) + of_node_clear_flag(child, OF_POPULATED); + } + + of_node_put(np); } bool of_gpiochip_instance_match(struct gpio_chip *gc, unsigned int index) From cb028f72fb3b2d4af81c74883e904df58ad01db6 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 2 Apr 2026 11:09:15 +0200 Subject: [PATCH 403/554] hv: Select CONFIG_SYSFB only for CONFIG_HYPERV_VMBUS commit d33db956c9618e7cb08c2520ce708437914214ec upstream. Hyperv's sysfb access only exists in the VMBUS support. Therefore only select CONFIG_SYSFB for CONFIG_HYPERV_VMBUS. Avoids sysfb code on systems that don't need it. Signed-off-by: Thomas Zimmermann Fixes: 96959283a58d ("Drivers: hv: Always select CONFIG_SYSFB for Hyper-V guests") Cc: Michael Kelley Cc: Saurabh Sengar Cc: Wei Liu Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Dexuan Cui Cc: Long Li Cc: linux-hyperv@vger.kernel.org Cc: # v6.16+ Reviewed-by: Saurabh Sengar Link: https://patch.msgid.link/20260402092305.208728-2-tzimmermann@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/hv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 0b8c391a03424..846453002e3c5 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -9,7 +9,6 @@ config HYPERV select PARAVIRT select X86_HV_CALLBACK_VECTOR if X86 select OF_EARLY_FLATTREE if OF - select SYSFB if EFI && !HYPERV_VTL_MODE select IRQ_MSI_LIB if X86 help Select this option to run Linux as a Hyper-V client operating @@ -61,6 +60,7 @@ config HYPERV_VMBUS tristate "Microsoft Hyper-V VMBus driver" depends on HYPERV default HYPERV + select SYSFB if EFI && !HYPERV_VTL_MODE help Select this option to enable Hyper-V Vmbus driver. From d1b9424f7393e3353128f6ec43613ea5ff0444f8 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 28 Apr 2026 08:53:39 -0400 Subject: [PATCH 404/554] hv_sock: fix ARM64 support commit b31681206e3f527970a7c7ed807fbf6a028fc25b upstream. VMBUS ring buffers must be page aligned. Therefore, the current value of 24K presents a challenge on ARM64 kernels (with 64K pages). So, use VMBUS_RING_SIZE() to ensure they are always aligned and large enough to hold all of the relevant data. Cc: stable@vger.kernel.org Fixes: 77ffe33363c0 ("hv_sock: use HV_HYP_PAGE_SIZE for Hyper-V communication") Tested-by: Dexuan Cui Reviewed-by: Dexuan Cui Signed-off-by: Hamza Mahfooz Acked-by: Stefano Garzarella Link: https://patch.msgid.link/20260428125339.13963-1-hamzamahfooz@linux.microsoft.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/hyperv_transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 432fcbbd14d4f..9393d3d85590b 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -375,10 +375,10 @@ static void hvs_open_connection(struct vmbus_channel *chan) } else { sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE); sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE); - sndbuf = ALIGN(sndbuf, HV_HYP_PAGE_SIZE); + sndbuf = VMBUS_RING_SIZE(sndbuf); rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE); rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE); - rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE); + rcvbuf = VMBUS_RING_SIZE(rcvbuf); } chan->max_pkt_size = HVS_MAX_PKT_SIZE; From 317e434e26af461a781dd9bf0be88bb4bbae7560 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 16 Apr 2026 12:14:33 -0700 Subject: [PATCH 405/554] hv_sock: Report EOF instead of -EIO for FIN commit f6315295899415f1ddcf39f7c9cb46d25e2c6c6a upstream. Commit f0c5827d07cb unluckily causes a regression for the FIN packet, and the final read syscall gets an error rather than 0. Ideally, we would want to fix hvs_channel_readable_payload() so that it could return 0 in the FIN scenario, but it's not good for the hv_sock driver to use the VMBus ringbuffer's cached priv_read_index, which is internal data in the VMBus driver. Fix the regression in hv_sock by returning 0 rather than -EIO. Fixes: f0c5827d07cb ("hv_sock: Return the readable bytes in hvs_stream_has_data()") Cc: stable@vger.kernel.org Reported-by: Ben Hillis Reported-by: Mitchell Levy Signed-off-by: Dexuan Cui Acked-by: Stefano Garzarella Link: https://patch.msgid.link/20260416191433.840637-1-decui@microsoft.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/hyperv_transport.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 9393d3d85590b..56056254801c0 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -694,7 +694,6 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg, static s64 hvs_stream_has_data(struct vsock_sock *vsk) { struct hvsock *hvs = vsk->trans; - bool need_refill; s64 ret; if (hvs->recv_data_len > 0) @@ -702,9 +701,22 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk) switch (hvs_channel_readable_payload(hvs->chan)) { case 1: - need_refill = !hvs->recv_desc; - if (!need_refill) - return -EIO; + if (hvs->recv_desc) { + /* Here hvs->recv_data_len is 0, so hvs->recv_desc must + * be NULL unless it points to the 0-byte-payload FIN + * packet: see hvs_update_recv_data(). + * + * Here all the payload has been dequeued, but + * hvs_channel_readable_payload() still returns 1, + * because the VMBus ringbuffer's read_index is not + * updated for the FIN packet: hvs_stream_dequeue() -> + * hv_pkt_iter_next() updates the cached priv_read_index + * but has no opportunity to update the read_index in + * hv_pkt_iter_close() as hvs_stream_has_data() returns + * 0 for the FIN packet, so it won't get dequeued. + */ + return 0; + } hvs->recv_desc = hv_pkt_iter_first(hvs->chan); if (!hvs->recv_desc) From 4f82f046f913638ad732df473099f0aa025f9e23 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 22 Apr 2026 23:48:11 -0700 Subject: [PATCH 406/554] hv_sock: Return -EIO for malformed/short packets commit 3d1f20727a635811f6b77801a7b57b8995268abd upstream. Commit f63152958994 fixes a regression, however it fails to report an error for malformed/short packets -- normally we should never see such packets, but let's report an error for them just in case. Fixes: f63152958994 ("hv_sock: Report EOF instead of -EIO for FIN") Cc: stable@vger.kernel.org Signed-off-by: Dexuan Cui Acked-by: Stefano Garzarella Link: https://patch.msgid.link/20260423064811.1371749-1-decui@microsoft.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/hyperv_transport.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 56056254801c0..f9dc9b4d30238 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -704,17 +704,26 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk) if (hvs->recv_desc) { /* Here hvs->recv_data_len is 0, so hvs->recv_desc must * be NULL unless it points to the 0-byte-payload FIN - * packet: see hvs_update_recv_data(). + * packet or a malformed/short packet: see + * hvs_update_recv_data(). * - * Here all the payload has been dequeued, but - * hvs_channel_readable_payload() still returns 1, - * because the VMBus ringbuffer's read_index is not - * updated for the FIN packet: hvs_stream_dequeue() -> - * hv_pkt_iter_next() updates the cached priv_read_index - * but has no opportunity to update the read_index in - * hv_pkt_iter_close() as hvs_stream_has_data() returns - * 0 for the FIN packet, so it won't get dequeued. + * If hvs->recv_desc points to the FIN packet, here all + * the payload has been dequeued and the peer_shutdown + * flag is set, but hvs_channel_readable_payload() still + * returns 1, because the VMBus ringbuffer's read_index + * is not updated for the FIN packet: + * hvs_stream_dequeue() -> hv_pkt_iter_next() updates + * the cached priv_read_index but has no opportunity to + * update the read_index in hv_pkt_iter_close() as + * hvs_stream_has_data() returns 0 for the FIN packet, + * so it won't get dequeued. + * + * In case hvs->recv_desc points to a malformed/short + * packet, return -EIO. */ + if (!(vsk->peer_shutdown & SEND_SHUTDOWN)) + return -EIO; + return 0; } From c1f261863e65b508f37416dfbc5c5d911c9b9233 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Fri, 24 Apr 2026 09:29:17 -0700 Subject: [PATCH 407/554] ibmveth: Disable GSO for packets with small MSS commit cc427d24ac6442ffdeafd157a63c7c5b73ed4de4 upstream. Some physical adapters on Power systems do not support segmentation offload when the MSS is less than 224 bytes. Attempting to send such packets causes the adapter to freeze, stopping all traffic until manually reset. Implement ndo_features_check to disable GSO for packets with small MSS values. The network stack will perform software segmentation instead. The 224-byte minimum matches ibmvnic commit ("ibmvnic: Enforce stronger sanity checks on GSO packets") which uses the same physical adapters in SEA configurations. The issue occurs specifically when the hardware attempts to perform segmentation (gso_segs > 1) with a small MSS. Single-segment GSO packets (gso_segs == 1) do not trigger the problematic LSO code path and are transmitted normally without segmentation. Add an ndo_features_check callback to disable GSO when MSS < 224 bytes. Also call vlan_features_check() to ensure proper handling of VLAN packets, particularly QinQ (802.1ad) configurations where the hardware parser may not support certain offload features. Validated using iptables to force small MSS values. Without the fix, the adapter freezes. With the fix, packets are segmented in software and transmission succeeds. Comprehensive regression testing completedd (MSS tests, performance, stability). Fixes: 8641dd85799f ("ibmveth: Add support for TSO") Cc: stable@vger.kernel.org Reviewed-by: Brian King Tested-by: Shaik Abdulla Tested-by: Naveed Ahmed Signed-off-by: Mingming Cao Link: https://patch.msgid.link/20260424162917.65725-1-mmc@linux.ibm.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmveth.c | 22 ++++++++++++++++++++++ drivers/net/ethernet/ibm/ibmveth.h | 1 + 2 files changed, 23 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 6f0821f1e798a..6e9552ce4c8c6 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1756,6 +1756,27 @@ static int ibmveth_set_mac_addr(struct net_device *dev, void *p) return 0; } +static netdev_features_t ibmveth_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + /* Some physical adapters do not support segmentation offload with + * MSS < 224. Disable GSO for such packets to avoid adapter freeze. + * Note: Single-segment packets (gso_segs == 1) don't need this check + * as they bypass the LSO path and are transmitted without segmentation. + */ + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_size < IBMVETH_MIN_LSO_MSS) { + netdev_warn_once(dev, + "MSS %u too small for LSO, disabling GSO\n", + skb_shinfo(skb)->gso_size); + features &= ~NETIF_F_GSO_MASK; + } + } + + return vlan_features_check(skb, features); +} + static const struct net_device_ops ibmveth_netdev_ops = { .ndo_open = ibmveth_open, .ndo_stop = ibmveth_close, @@ -1767,6 +1788,7 @@ static const struct net_device_ops ibmveth_netdev_ops = { .ndo_set_features = ibmveth_set_features, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = ibmveth_set_mac_addr, + .ndo_features_check = ibmveth_features_check, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ibmveth_poll_controller, #endif diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 068f99df133ec..d87713668ed30 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -37,6 +37,7 @@ #define IBMVETH_ILLAN_IPV4_TCP_CSUM 0x0000000000000002UL #define IBMVETH_ILLAN_ACTIVE_TRUNK 0x0000000000000001UL +#define IBMVETH_MIN_LSO_MSS 224 /* Minimum MSS for LSO */ /* hcall macros */ #define h_register_logical_lan(ua, buflst, rxq, fltlst, mac) \ plpar_hcall_norets(H_REGISTER_LOGICAL_LAN, ua, buflst, rxq, fltlst, mac) From 121d1f253aed515cd85748f68c664a6cb756e8ad Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Thu, 16 Apr 2026 17:53:27 -0700 Subject: [PATCH 408/554] ice: fix double free in ice_sf_eth_activate() error path commit 9aab1c3d7299285e2569cbc0ed5892d631a241b2 upstream. When auxiliary_device_add() fails, ice_sf_eth_activate() jumps to aux_dev_uninit and calls auxiliary_device_uninit(&sf_dev->adev). The device release callback ice_sf_dev_release() frees sf_dev, but the current error path falls through to sf_dev_free and calls kfree(sf_dev) again, causing a double free. Keep kfree(sf_dev) for the auxiliary_device_init() failure path, but avoid falling through to sf_dev_free after auxiliary_device_uninit(). Fixes: 13acc5c4cdbe ("ice: subfunction activation and base devlink ops") Cc: stable@vger.kernel.org Reviewed-by: Aleksandr Loktionov Signed-off-by: Guangshuo Li Reviewed-by: Simon Horman Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260416-iwl-net-submission-2026-04-14-v2-3-686c33c9828d@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ice/ice_sf_eth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_sf_eth.c b/drivers/net/ethernet/intel/ice/ice_sf_eth.c index 1a2c94375ca71..f7266d0368153 100644 --- a/drivers/net/ethernet/intel/ice/ice_sf_eth.c +++ b/drivers/net/ethernet/intel/ice/ice_sf_eth.c @@ -305,6 +305,8 @@ ice_sf_eth_activate(struct ice_dynamic_port *dyn_port, aux_dev_uninit: auxiliary_device_uninit(&sf_dev->adev); + return err; + sf_dev_free: kfree(sf_dev); xa_erase: From d945d71bbad58d727aa2d37e7d3503d08e2931b7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 9 Apr 2026 14:04:17 +0200 Subject: [PATCH 409/554] spi: microchip-core-qspi: fix controller deregistration commit e6464140d439f2d42f072eb422a5b1fec470c5a6 upstream. Make sure to deregister the controller before disabling underlying resources like interrupts during driver unbind. Fixes: 8596124c4c1b ("spi: microchip-core-qspi: Add support for microchip fpga qspi controllers") Cc: stable@vger.kernel.org # 6.1 Cc: Naga Sureshkumar Relli Signed-off-by: Johan Hovold Acked-by: Conor Dooley Link: https://patch.msgid.link/20260409120419.388546-19-johan@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-microchip-core-qspi.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-microchip-core-qspi.c b/drivers/spi/spi-microchip-core-qspi.c index aafe6cbf2aea7..eab059fb0bc2c 100644 --- a/drivers/spi/spi-microchip-core-qspi.c +++ b/drivers/spi/spi-microchip-core-qspi.c @@ -692,7 +692,7 @@ static int mchp_coreqspi_probe(struct platform_device *pdev) return -ENOMEM; qspi = spi_controller_get_devdata(ctlr); - platform_set_drvdata(pdev, qspi); + platform_set_drvdata(pdev, ctlr); qspi->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(qspi->regs)) @@ -732,7 +732,7 @@ static int mchp_coreqspi_probe(struct platform_device *pdev) ctlr->num_chipselect = 2; ctlr->use_gpio_descriptors = true; - ret = devm_spi_register_controller(&pdev->dev, ctlr); + ret = spi_register_controller(ctlr); if (ret) return dev_err_probe(&pdev->dev, ret, "spi_register_controller failed\n"); @@ -742,9 +742,13 @@ static int mchp_coreqspi_probe(struct platform_device *pdev) static void mchp_coreqspi_remove(struct platform_device *pdev) { - struct mchp_coreqspi *qspi = platform_get_drvdata(pdev); - u32 control = readl_relaxed(qspi->regs + REG_CONTROL); + struct spi_controller *ctlr = platform_get_drvdata(pdev); + struct mchp_coreqspi *qspi = spi_controller_get_devdata(ctlr); + u32 control; + spi_unregister_controller(ctlr); + + control = readl_relaxed(qspi->regs + REG_CONTROL); mchp_coreqspi_disable_ints(qspi); control &= ~CONTROL_ENABLE; writel_relaxed(control, qspi->regs + REG_CONTROL); From ec9d0ddbde6003c303fa5e1d5cd48952852984d8 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 30 Apr 2026 11:10:19 +0100 Subject: [PATCH 410/554] spi: microchip-core-qspi: don't attempt to transmit during emulated read-only dual/quad operations commit eb56deaabf127e8985fc91fa6c97bf8a3b062844 upstream. The core will deal with reads by creating clock cycles itself, there's no need to generate clock cycles by transmitting garbage data at the driver level. Further, transmitting garbage data just bricks the transfer since QSPI doesn't have a dedicated master-out line like MOSI in regular SPI. I'm not entirely sure if the transfer is bricked because of the garbage data being transmitted on the bus or because the core loses track of whether it is supposed to be sending or receiving data. Fixes: 8f9cf02c88528 ("spi: microchip-core-qspi: Add regular transfers") CC: stable@vger.kernel.org Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260430-freezing-saloon-95b1f3d9dad0@spud Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-microchip-core-qspi.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-microchip-core-qspi.c b/drivers/spi/spi-microchip-core-qspi.c index eab059fb0bc2c..6b7d350dd53d8 100644 --- a/drivers/spi/spi-microchip-core-qspi.c +++ b/drivers/spi/spi-microchip-core-qspi.c @@ -662,18 +662,28 @@ static int mchp_coreqspi_transfer_one(struct spi_controller *ctlr, struct spi_de struct spi_transfer *t) { struct mchp_coreqspi *qspi = spi_controller_get_devdata(ctlr); + bool dual_quad = false; qspi->tx_len = t->len; + if (t->tx_nbits == SPI_NBITS_QUAD || t->rx_nbits == SPI_NBITS_QUAD || + t->tx_nbits == SPI_NBITS_DUAL || + t->rx_nbits == SPI_NBITS_DUAL) + dual_quad = true; + if (t->tx_buf) qspi->txbuf = (u8 *)t->tx_buf; if (!t->rx_buf) { mchp_coreqspi_write_op(qspi); - } else { + } else if (!dual_quad) { qspi->rxbuf = (u8 *)t->rx_buf; qspi->rx_len = t->len; mchp_coreqspi_write_read_op(qspi); + } else { + qspi->rxbuf = (u8 *)t->rx_buf; + qspi->rx_len = t->len; + mchp_coreqspi_read_op(qspi); } return 0; From 998f43196d732f20f9b71eb6ebd973736c9fa911 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 30 Apr 2026 11:10:18 +0100 Subject: [PATCH 411/554] spi: microchip-core-qspi: control built-in cs manually commit 7672749e1496215e8683ce57cf323119033954cf upstream. The coreQSPI IP supports only a single chip select, which is automagically operated by the hardware - set low when the transmit buffer first gets written to and set high when the number of bytes written to the TOTALBYTES field of the FRAMES register have been sent on the bus. Additional devices must use GPIOs for their chip selects. It was reported to me that if there are two devices attached to this QSPI controller that the in-built chip select is set low while linux tries to access the device attached to the GPIO. This went undetected as the boards that connected multiple devices to the SPI controller all exclusively used GPIOs for chip selects, not relying on the built-in chip select at all. It turns out that this was because the built-in chip select, when controlled automagically, is set low when active and high when inactive, thereby ruling out its use for active-high devices or devices that need to transmit with the chip select disabled. Modify the driver so that it controls chip select directly, retaining the behaviour for mem_ops of setting the chip select active for the entire duration of the transfer in the exec_op callback. For regular transfers, implement the set_cs callback for the core to use. As part of this, the existing setup callback, mchp_coreqspi_setup_op(), is removed. Modifying the CLKIDLE field is not safe to do during operation when there are multiple devices, so this code is removed entirely. Setting the MASTER and ENABLE fields is something that can be done once at probe, it doesn't need to be re-run for each device. Instead the new setup callback sets the built-in chip select to its inactive state for active-low devices, as the reset value of the chip select in software controlled mode is low. Fixes: 8f9cf02c88528 ("spi: microchip-core-qspi: Add regular transfers") Fixes: 8596124c4c1bc ("spi: microchip-core-qspi: Add support for microchip fpga qspi controllers") CC: stable@vger.kernel.org Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260430-hamstring-busload-f941d0347b5e@spud Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-microchip-core-qspi.c | 79 ++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 15 deletions(-) diff --git a/drivers/spi/spi-microchip-core-qspi.c b/drivers/spi/spi-microchip-core-qspi.c index 6b7d350dd53d8..70215a407b5a3 100644 --- a/drivers/spi/spi-microchip-core-qspi.c +++ b/drivers/spi/spi-microchip-core-qspi.c @@ -74,6 +74,13 @@ #define STATUS_FLAGSX4 BIT(8) #define STATUS_MASK GENMASK(8, 0) +/* + * QSPI Direct Access register defines + */ +#define DIRECT_ACCESS_EN_SSEL BIT(0) +#define DIRECT_ACCESS_OP_SSEL BIT(1) +#define DIRECT_ACCESS_OP_SSEL_SHIFT 1 + #define BYTESUPPER_MASK GENMASK(31, 16) #define BYTESLOWER_MASK GENMASK(15, 0) @@ -158,6 +165,38 @@ static int mchp_coreqspi_set_mode(struct mchp_coreqspi *qspi, const struct spi_m return 0; } +static void mchp_coreqspi_set_cs(struct spi_device *spi, bool enable) +{ + struct mchp_coreqspi *qspi = spi_controller_get_devdata(spi->controller); + u32 val; + + val = readl(qspi->regs + REG_DIRECT_ACCESS); + + val &= ~DIRECT_ACCESS_OP_SSEL; + val |= !enable << DIRECT_ACCESS_OP_SSEL_SHIFT; + + writel(val, qspi->regs + REG_DIRECT_ACCESS); +} + +static int mchp_coreqspi_setup(struct spi_device *spi) +{ + struct mchp_coreqspi *qspi = spi_controller_get_devdata(spi->controller); + u32 val; + + /* + * Active low devices need to be specifically set to their inactive + * states during probe. + */ + if (spi->mode & SPI_CS_HIGH) + return 0; + + val = readl(qspi->regs + REG_DIRECT_ACCESS); + val |= DIRECT_ACCESS_OP_SSEL; + writel(val, qspi->regs + REG_DIRECT_ACCESS); + + return 0; +} + static inline void mchp_coreqspi_read_op(struct mchp_coreqspi *qspi) { u32 control, data; @@ -380,19 +419,6 @@ static int mchp_coreqspi_setup_clock(struct mchp_coreqspi *qspi, struct spi_devi return 0; } -static int mchp_coreqspi_setup_op(struct spi_device *spi_dev) -{ - struct spi_controller *ctlr = spi_dev->controller; - struct mchp_coreqspi *qspi = spi_controller_get_devdata(ctlr); - u32 control = readl_relaxed(qspi->regs + REG_CONTROL); - - control |= (CONTROL_MASTER | CONTROL_ENABLE); - control &= ~CONTROL_CLKIDLE; - writel_relaxed(control, qspi->regs + REG_CONTROL); - - return 0; -} - static inline void mchp_coreqspi_config_op(struct mchp_coreqspi *qspi, const struct spi_mem_op *op) { u32 idle_cycles = 0; @@ -483,6 +509,7 @@ static int mchp_coreqspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *o reinit_completion(&qspi->data_completion); mchp_coreqspi_config_op(qspi, op); + mchp_coreqspi_set_cs(mem->spi, true); if (op->cmd.opcode) { qspi->txbuf = &opcode; qspi->rxbuf = NULL; @@ -523,6 +550,7 @@ static int mchp_coreqspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *o err = -ETIMEDOUT; error: + mchp_coreqspi_set_cs(mem->spi, false); mutex_unlock(&qspi->op_lock); mchp_coreqspi_disable_ints(qspi); @@ -696,6 +724,7 @@ static int mchp_coreqspi_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; int ret; + u32 num_cs, val; ctlr = devm_spi_alloc_host(&pdev->dev, sizeof(*qspi)); if (!ctlr) @@ -728,10 +757,18 @@ static int mchp_coreqspi_probe(struct platform_device *pdev) return ret; } + /* + * The IP core only has a single CS, any more have to be provided via + * gpios + */ + if (of_property_read_u32(pdev->dev.of_node, "num-cs", &num_cs)) + num_cs = 1; + + ctlr->num_chipselect = num_cs; + ctlr->bits_per_word_mask = SPI_BPW_MASK(8); ctlr->mem_ops = &mchp_coreqspi_mem_ops; ctlr->mem_caps = &mchp_coreqspi_mem_caps; - ctlr->setup = mchp_coreqspi_setup_op; ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_RX_DUAL | SPI_RX_QUAD | SPI_TX_DUAL | SPI_TX_QUAD; ctlr->dev.of_node = np; @@ -739,9 +776,21 @@ static int mchp_coreqspi_probe(struct platform_device *pdev) ctlr->prepare_message = mchp_coreqspi_prepare_message; ctlr->unprepare_message = mchp_coreqspi_unprepare_message; ctlr->transfer_one = mchp_coreqspi_transfer_one; - ctlr->num_chipselect = 2; + ctlr->setup = mchp_coreqspi_setup; + ctlr->set_cs = mchp_coreqspi_set_cs; ctlr->use_gpio_descriptors = true; + val = readl_relaxed(qspi->regs + REG_CONTROL); + val |= (CONTROL_MASTER | CONTROL_ENABLE); + writel_relaxed(val, qspi->regs + REG_CONTROL); + + /* + * Put cs into software controlled mode + */ + val = readl_relaxed(qspi->regs + REG_DIRECT_ACCESS); + val |= DIRECT_ACCESS_EN_SSEL; + writel(val, qspi->regs + REG_DIRECT_ACCESS); + ret = spi_register_controller(ctlr); if (ret) return dev_err_probe(&pdev->dev, ret, From b1040af587713e0475f83f17b180d5a927da3346 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sat, 4 Apr 2026 14:47:47 +0100 Subject: [PATCH 412/554] tracefs: Fix default permissions not being applied on initial mount commit e8368d1f4bedbb0cce4cfe33a1d2664bb0fd4f27 upstream. Commit e4d32142d1de ("tracing: Fix tracefs mount options") moved the option application from tracefs_fill_super() to tracefs_reconfigure() called from tracefs_get_tree(). This fixed mount options being ignored on user-space mounts when the superblock already exists, but introduced a regression for the initial kernel-internal mount. On the first mount (via simple_pin_fs during init), sget_fc() transfers fc->s_fs_info to sb->s_fs_info and sets fc->s_fs_info to NULL. When tracefs_get_tree() then calls tracefs_reconfigure(), it sees a NULL fc->s_fs_info and returns early without applying any options. The root inode keeps mode 0755 from simple_fill_super() instead of the intended TRACEFS_DEFAULT_MODE (0700). Furthermore, even on subsequent user-space mounts without an explicit mode= option, tracefs_apply_options(sb, true) gates the mode behind fsi->opts & BIT(Opt_mode), which is unset for the defaults. So the mode is never corrected unless the user explicitly passes mode=0700. Restore the tracefs_apply_options(sb, false) call in tracefs_fill_super() to apply default permissions on initial superblock creation, matching what debugfs does in debugfs_fill_super(). Cc: stable@vger.kernel.org Fixes: e4d32142d1de ("tracing: Fix tracefs mount options") Link: https://patch.msgid.link/20260404134747.98867-1-devnexen@gmail.com Signed-off-by: David Carlier Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- fs/tracefs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 0c023941a316f..35c0cc557fbb7 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -491,6 +491,7 @@ static int tracefs_fill_super(struct super_block *sb, struct fs_context *fc) return err; sb->s_op = &tracefs_super_operations; + tracefs_apply_options(sb, false); set_default_d_op(sb, &tracefs_dentry_operations); return 0; From 1873eb81c65d3f849418d7386baa39c439c9fc38 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Mon, 13 Apr 2026 17:12:40 -0400 Subject: [PATCH 413/554] udf: reject descriptors with oversized CRC length commit 55d41b0a20128e86b9e960dd2e3f0a2d69a18df7 upstream. udf_read_tagged() skips CRC verification when descCRCLength + sizeof(struct tag) exceeds the block size. A crafted UDF image can set descCRCLength to an oversized value to bypass CRC validation entirely; the descriptor is then accepted based solely on the 8-bit tag checksum, which is trivially recomputable. Reject such descriptors instead of silently accepting them. A legitimate single-block descriptor should never have a CRC length that exceeds the block. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-6 Assisted-by: Codex:gpt-5-4 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260413211240.853662-1-michael.bommarito@gmail.com Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/udf/misc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/udf/misc.c b/fs/udf/misc.c index 0788593b6a1d8..6928e378fbbdc 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -230,8 +230,12 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, } /* Verify the descriptor CRC */ - if (le16_to_cpu(tag_p->descCRCLength) + sizeof(struct tag) > sb->s_blocksize || - le16_to_cpu(tag_p->descCRC) == crc_itu_t(0, + if (le16_to_cpu(tag_p->descCRCLength) + sizeof(struct tag) > sb->s_blocksize) { + udf_err(sb, "block %u: CRC length %u exceeds block size\n", + block, le16_to_cpu(tag_p->descCRCLength)); + goto error_out; + } + if (le16_to_cpu(tag_p->descCRC) == crc_itu_t(0, bh->b_data + sizeof(struct tag), le16_to_cpu(tag_p->descCRCLength))) return bh; From eb295d2b6e70bbd83f8e6fcad4631c858707a245 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 7 Apr 2026 15:58:34 +0200 Subject: [PATCH 414/554] thermal: core: Free thermal zone ID later during removal commit daae9c18feec74566e023fc88cfb0ce26e39d868 upstream. The thermal zone removal ordering is different from the thermal zone registration rollback path ordering and the former is arguably problematic because freeing a thermal zone ID prematurely may cause it to be used during the registration of another thermal zone which may fail as a result. Prevent that from occurring by changing the thermal zone removal ordering to reflect the thermal zone registration rollback path ordering. Also more the ida_destroy() call from thermal_zone_device_unregister() to thermal_release() for consistency. Fixes: b31ef8285b19 ("thermal core: convert ID allocation to IDA") Cc: All applicable Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/5063934.GXAFRqVoOG@rafael.j.wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/thermal_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 61e6dc81dc923..1310e76c0fa64 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -965,6 +965,7 @@ static void thermal_release(struct device *dev) tz = to_thermal_zone(dev); thermal_zone_destroy_device_groups(tz); thermal_set_governor(tz, NULL); + ida_destroy(&tz->ida); mutex_destroy(&tz->lock); complete(&tz->removal); } else if (!strncmp(dev_name(dev), "cooling_device", @@ -1726,8 +1727,6 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz) thermal_thresholds_exit(tz); thermal_remove_hwmon_sysfs(tz); - ida_free(&thermal_tz_ida, tz->id); - ida_destroy(&tz->ida); device_del(&tz->device); put_device(&tz->device); @@ -1735,6 +1734,9 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz) thermal_notify_tz_delete(tz); wait_for_completion(&tz->removal); + + ida_free(&thermal_tz_ida, tz->id); + kfree(tz->tzp); kfree(tz); } From 3933dfb360e04edb01c8957a338df7eb809f356e Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 7 Mar 2026 11:24:20 +0100 Subject: [PATCH 415/554] thermal/drivers/sprd: Fix temperature clamping in sprd_thm_temp_to_rawdata commit 83c0f9a5d679a6f8d84fc49b2f62ea434ccab4b6 upstream. The temperature was never clamped to SPRD_THM_TEMP_LOW or SPRD_THM_TEMP_HIGH because the return value of clamp() was not used. Fix this by assigning the clamped value to 'temp'. Casting SPRD_THM_TEMP_LOW and SPRD_THM_TEMP_HIGH to int is also redundant and can be removed. Fixes: 554fdbaf19b1 ("thermal: sprd: Add Spreadtrum thermal driver support") Signed-off-by: Thorsten Blum Signed-off-by: Daniel Lezcano Reviewed-by: Baolin Wang Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260307102422.306055-1-thorsten.blum@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/sprd_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/sprd_thermal.c b/drivers/thermal/sprd_thermal.c index e546067c96218..70c879e75d85c 100644 --- a/drivers/thermal/sprd_thermal.c +++ b/drivers/thermal/sprd_thermal.c @@ -192,7 +192,7 @@ static int sprd_thm_temp_to_rawdata(int temp, struct sprd_thermal_sensor *sen) { u32 val; - clamp(temp, (int)SPRD_THM_TEMP_LOW, (int)SPRD_THM_TEMP_HIGH); + temp = clamp(temp, SPRD_THM_TEMP_LOW, SPRD_THM_TEMP_HIGH); /* * According to the thermal datasheet, the formula of converting From 4b16c06da7636d2a3f62bb0667a0dc565e0e8c80 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 7 Mar 2026 11:24:21 +0100 Subject: [PATCH 416/554] thermal/drivers/sprd: Fix raw temperature clamping in sprd_thm_rawdata_to_temp commit b3414148bbc1f9cd56217e58a558c6ac4fd1b4a6 upstream. The raw temperature data was never clamped to SPRD_THM_RAW_DATA_LOW or SPRD_THM_RAW_DATA_HIGH because the return value of clamp() was not used. Fix this by assigning the clamped value to 'rawdata'. Casting SPRD_THM_RAW_DATA_LOW and SPRD_THM_RAW_DATA_HIGH to u32 is also redundant and can be removed. Fixes: 554fdbaf19b1 ("thermal: sprd: Add Spreadtrum thermal driver support") Signed-off-by: Thorsten Blum Signed-off-by: Daniel Lezcano Reviewed-by: Baolin Wang Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260307102422.306055-2-thorsten.blum@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/sprd_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/sprd_thermal.c b/drivers/thermal/sprd_thermal.c index 70c879e75d85c..44fa45f74da72 100644 --- a/drivers/thermal/sprd_thermal.c +++ b/drivers/thermal/sprd_thermal.c @@ -178,7 +178,7 @@ static int sprd_thm_sensor_calibration(struct device_node *np, static int sprd_thm_rawdata_to_temp(struct sprd_thermal_sensor *sen, u32 rawdata) { - clamp(rawdata, (u32)SPRD_THM_RAW_DATA_LOW, (u32)SPRD_THM_RAW_DATA_HIGH); + rawdata = clamp(rawdata, SPRD_THM_RAW_DATA_LOW, SPRD_THM_RAW_DATA_HIGH); /* * According to the thermal datasheet, the formula of converting From b3bdc7a8dd44f097afa705265da1a2206e9d5b5e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 14 Apr 2026 15:43:18 +0200 Subject: [PATCH 417/554] spi: topcliff-pch: fix controller deregistration commit 5d6f477d6fc0767c57c5e1e6f55a1662820eef87 upstream. Make sure to deregister the controller before disabling and releasing underlying resources like interrupts and DMA during driver unbind. Fixes: e8b17b5b3f30 ("spi/topcliff: Add topcliff platform controller hub (PCH) spi bus driver") Cc: stable@vger.kernel.org # 2.6.37 Cc: Masayuki Ohtake Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260414134319.978196-8-johan@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-topcliff-pch.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index 60fce5c73031f..77527a4371839 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -1406,6 +1406,10 @@ static void pch_spi_pd_remove(struct platform_device *plat_dev) dev_dbg(&plat_dev->dev, "%s:[ch%d] irq=%d\n", __func__, plat_dev->id, board_dat->pdev->irq); + spi_controller_get(data->host); + + spi_unregister_controller(data->host); + if (use_dma) pch_free_dma_buf(board_dat, data); @@ -1433,7 +1437,8 @@ static void pch_spi_pd_remove(struct platform_device *plat_dev) } pci_iounmap(board_dat->pdev, data->io_remap_addr); - spi_unregister_controller(data->host); + + spi_controller_put(data->host); } #ifdef CONFIG_PM static int pch_spi_pd_suspend(struct platform_device *pd_dev, From d50ef3553acbacce6f2843304d41d06dca358bb6 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 14 Apr 2026 15:43:19 +0200 Subject: [PATCH 418/554] spi: topcliff-pch: fix use-after-free on unbind commit 9d72732fe70c11424bc90ed466c7ccfa58b42a9a upstream. Give the driver a chance to flush its queue before releasing the DMA buffers on driver unbind Fixes: c37f3c2749b5 ("spi/topcliff_pch: DMA support") Cc: stable@vger.kernel.org # 3.1 Cc: Tomoya MORINAGA Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260414134319.978196-9-johan@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-topcliff-pch.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index 77527a4371839..87de0145f35fe 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -1410,9 +1410,6 @@ static void pch_spi_pd_remove(struct platform_device *plat_dev) spi_unregister_controller(data->host); - if (use_dma) - pch_free_dma_buf(board_dat, data); - /* check for any pending messages; no action is taken if the queue * is still full; but at least we tried. Unload anyway */ count = 500; @@ -1436,6 +1433,9 @@ static void pch_spi_pd_remove(struct platform_device *plat_dev) free_irq(board_dat->pdev->irq, data); } + if (use_dma) + pch_free_dma_buf(board_dat, data); + pci_iounmap(board_dat->pdev, data->io_remap_addr); spi_controller_put(data->host); From b528c7ffdc3e56e6146d90bce3fcfd7db145fb89 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 28 Apr 2026 12:23:02 -0400 Subject: [PATCH 419/554] tracing/probes: Limit size of event probe to 3K commit b2aa3b4d64e460ac606f386c24e7d8a873ce6f1a upstream. There currently isn't a max limit an event probe can be. One could make an event greater than PAGE_SIZE, which makes the event useless because if it's bigger than the max event that can be recorded into the ring buffer, then it will never be recorded. A event probe should never need to be greater than 3K, so make that the max size. As long as the max is less than the max that can be recorded onto the ring buffer, it should be fine. Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Acked-by: Masami Hiramatsu (Google) Fixes: 93ccae7a22274 ("tracing/kprobes: Support basic types on dynamic events") Link: https://patch.msgid.link/20260428122302.706610ba@gandalf.local.home Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_probe.c | 6 ++++++ kernel/trace/trace_probe.h | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index d7adbf1536c8b..d94d374b9d804 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1523,6 +1523,12 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, parg->offset = *size; *size += parg->type->size * (parg->count ?: 1); + if (*size > MAX_PROBE_EVENT_SIZE) { + ret = -E2BIG; + trace_probe_log_err(ctx->offset, EVENT_TOO_BIG); + goto fail; + } + if (parg->count) { len = strlen(parg->type->fmttype) + 6; parg->fmt = kmalloc(len, GFP_KERNEL); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 08b5bda24da22..6149c4850382c 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -38,6 +38,7 @@ #define MAX_BTF_ARGS_LEN 128 #define MAX_DENTRY_ARGS_LEN 256 #define MAX_STRING_SIZE PATH_MAX +#define MAX_PROBE_EVENT_SIZE 3072 /* Reserved field names */ #define FIELD_STRING_IP "__probe_ip" @@ -561,7 +562,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_TYPE4STR, "This type does not fit for string."),\ C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"),\ C(TOO_MANY_ARGS, "Too many arguments are specified"), \ - C(TOO_MANY_EARGS, "Too many entry arguments specified"), + C(TOO_MANY_EARGS, "Too many entry arguments specified"), \ + C(EVENT_TOO_BIG, "Event too big (too many fields?)"), #undef C #define C(a, b) TP_ERR_##a From 9eccdb76ad5e69cdf83563853d171614bc15c13e Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Thu, 12 Feb 2026 16:57:50 +0800 Subject: [PATCH 420/554] clk: imx: imx8-acm: fix flags for acm clocks commit f2c2fc93b4a3efdfcf3805ab74741826d343ff2c upstream. Currently, the flags for the ACM clocks are set to 0. This configuration causes the fsl-sai audio driver to fail when attempting to set the sysclk, returning an EINVAL error. The following error messages highlight the issue: fsl-sai 59090000.sai: ASoC: error at snd_soc_dai_set_sysclk on 59090000.sai: -22 imx-hdmi sound-hdmi: failed to set cpu sysclk: -22 By setting the flag CLK_SET_RATE_NO_REPARENT, we signal that the ACM driver does not support reparenting and instead relies on the clock tree as defined in the device tree. This change resolves the issue with the fsl-sai audio driver. CC: stable@vger.kernel.org Fixes: d3a0946d7ac9 ("clk: imx: imx8: add audio clock mux driver") Signed-off-by: Stefan Eichenberger Signed-off-by: Shengjiu Wang Reviewed-by: Peng Fan Link: https://patch.msgid.link/20260212085750.3253187-1-shengjiu.wang@nxp.com Signed-off-by: Abel Vesa Signed-off-by: Greg Kroah-Hartman --- drivers/clk/imx/clk-imx8-acm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-imx8-acm.c b/drivers/clk/imx/clk-imx8-acm.c index 790f7e44b11e2..07dca6f31cf8e 100644 --- a/drivers/clk/imx/clk-imx8-acm.c +++ b/drivers/clk/imx/clk-imx8-acm.c @@ -371,7 +371,8 @@ static int imx8_acm_clk_probe(struct platform_device *pdev) for (i = 0; i < priv->soc_data->num_sels; i++) { hws[sels[i].clkid] = devm_clk_hw_register_mux_parent_data_table(dev, sels[i].name, sels[i].parents, - sels[i].num_parents, 0, + sels[i].num_parents, + CLK_SET_RATE_NO_REPARENT, base + sels[i].reg, sels[i].shift, sels[i].width, 0, NULL, NULL); From a0780aeea166a7cf4706c45af4cadbb2a43a1fc9 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 24 Feb 2026 09:35:25 +0000 Subject: [PATCH 421/554] clk: microchip: mpfs-ccc: fix out of bounds access during output registration commit 2f7ae8ab6aa73daaf080d5332110357c29df9c36 upstream. UBSAN reported an out of bounds access during registration of the last two outputs. This out of bounds access occurs because space is only allocated in the hws array for two PLLs and the four output dividers that each has, but the defined IDs contain two DLLS and their two outputs each, which are not supported by the driver. The ID order is PLLs -> DLLs -> PLL outputs -> DLL outputs. Decrement the PLL output IDs by two while adding them to the array to avoid the problem. Fixes: d39fb172760e ("clk: microchip: add PolarFire SoC fabric clock support") CC: stable@vger.kernel.org Reviewed-by: Brian Masney Signed-off-by: Conor Dooley Signed-off-by: Greg Kroah-Hartman --- drivers/clk/microchip/clk-mpfs-ccc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/clk/microchip/clk-mpfs-ccc.c b/drivers/clk/microchip/clk-mpfs-ccc.c index 3a3ea2d142f8a..0a76a1aaa50f7 100644 --- a/drivers/clk/microchip/clk-mpfs-ccc.c +++ b/drivers/clk/microchip/clk-mpfs-ccc.c @@ -178,7 +178,7 @@ static int mpfs_ccc_register_outputs(struct device *dev, struct mpfs_ccc_out_hw_ return dev_err_probe(dev, ret, "failed to register clock id: %d\n", out_hw->id); - data->hw_data.hws[out_hw->id] = &out_hw->divider.hw; + data->hw_data.hws[out_hw->id - 2] = &out_hw->divider.hw; } return 0; @@ -234,6 +234,10 @@ static int mpfs_ccc_probe(struct platform_device *pdev) unsigned int num_clks; int ret; + /* + * If DLLs get added here, mpfs_ccc_register_outputs() currently packs + * sparse clock IDs in the hws array + */ num_clks = ARRAY_SIZE(mpfs_ccc_pll_clks) + ARRAY_SIZE(mpfs_ccc_pll0out_clks) + ARRAY_SIZE(mpfs_ccc_pll1out_clks); From 54a44bbead390d95c68655df9ca6d6cb1fceac78 Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Wed, 11 Mar 2026 11:47:09 +0530 Subject: [PATCH 422/554] cpuidle: powerpc: avoid double clear when breaking snooze commit 64ed1e3e728afb57ba9acb59e69de930ead847d9 upstream. snooze_loop is done often in any system which has fair bit of idle time. So it qualifies for even micro-optimizations. When breaking the snooze due to timeout, TIF_POLLING_NRFLAG is cleared twice. Clearing the bit invokes atomics. Avoid double clear and thereby avoid one atomic write. dev->poll_time_limit indicates whether the loop was broken due to timeout. Use that instead of defining a new variable. Fixes: 7ded429152e8 ("cpuidle: powerpc: no memory barrier after break from idle") Cc: stable@vger.kernel.org Reviewed-by: Mukesh Kumar Chaurasiya (IBM) Signed-off-by: Shrikanth Hegde Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260311061709.1230440-1-sshegde@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/cpuidle-powernv.c | 5 ++++- drivers/cpuidle/cpuidle-pseries.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 9ebedd972df0b..b89e7111e7b8c 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -95,7 +95,10 @@ static int snooze_loop(struct cpuidle_device *dev, HMT_medium(); ppc64_runlatch_on(); - clear_thread_flag(TIF_POLLING_NRFLAG); + + /* Avoid double clear when breaking */ + if (!dev->poll_time_limit) + clear_thread_flag(TIF_POLLING_NRFLAG); local_irq_disable(); diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index f68c65f1d023f..864dd5d6e627b 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -64,7 +64,10 @@ int snooze_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, } HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); + + /* Avoid double clear when breaking */ + if (!dev->poll_time_limit) + clear_thread_flag(TIF_POLLING_NRFLAG); raw_local_irq_disable(); From 65aabf8896687354e0628feb7298c39232e133e3 Mon Sep 17 00:00:00 2001 From: Tommaso Soncin Date: Wed, 29 Apr 2026 18:08:57 +0200 Subject: [PATCH 423/554] ASoC: amd: yc: Add HP OMEN Gaming Laptop 16-ap0xxx product line in quirk table commit d63c219b7ff39f897da10c160a2edef76320f16c upstream. Add a DMI quirk for the HP OMEN Gaming Laptop 16-ap0xxx line fixing the issue where the internal microphone was not detected. Cc: stable@vger.kernel.org Signed-off-by: Tommaso Soncin Link: https://patch.msgid.link/20260429160858.538986-1-soncintommaso@gmail.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/amd/yc/acp6x-mach.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 4c0acdad13ea1..d9f145b634f35 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -52,6 +52,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Laptop 15-fc0xxx"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "OMEN Gaming Laptop 16-ap0xxx"), + } + }, { .driver_data = &acp6x_card, .matches = { @@ -654,6 +661,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "8EE4"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8E35"), + } + }, { .driver_data = &acp6x_card, .matches = { From 3a5287c47b13b530e7217efc7786b5a248d5ced0 Mon Sep 17 00:00:00 2001 From: Li Jian Date: Fri, 17 Apr 2026 18:53:14 +0800 Subject: [PATCH 424/554] ASoC: ES8389: convert to devm_clk_get_optional() to get clock commit 8ed3311131077712cdd0b3afec6909b9388ad3e4 upstream. When enabling ES8390 via ACPI description, es8389 would fail to obtain a clock source, causing the driver to fail to initialize. This was not an issue with older kernels, but since commit abae8e57e49a ("clk: generalize devm_clk_get() a bit"), devm_clk_get() would return an error pointer when a clock source was not detected (instead of falling back to a static clock), causing the driver to fail early. Use devm_clk_get_optional() instead to return to the previous behaviour, allowing the use of a static clock source. Cc: stable@vger.kernel.org Signed-off-by: Li Jian Link: https://patch.msgid.link/tencent_7C78374FB9F4B3A37101E5C719715D8BC40A@qq.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/es8389.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/es8389.c b/sound/soc/codecs/es8389.c index 6e4c75d288ef9..8960969cfbaa1 100644 --- a/sound/soc/codecs/es8389.c +++ b/sound/soc/codecs/es8389.c @@ -827,7 +827,7 @@ static int es8389_probe(struct snd_soc_component *component) es8389->mclk_src = ES8389_MCLK_SOURCE; } - es8389->mclk = devm_clk_get(component->dev, "mclk"); + es8389->mclk = devm_clk_get_optional(component->dev, "mclk"); if (IS_ERR(es8389->mclk)) return dev_err_probe(component->dev, PTR_ERR(es8389->mclk), "ES8389 is unable to get mclk\n"); From 846fcce3a6bbe34b01e1fe87591db4302d780430 Mon Sep 17 00:00:00 2001 From: Joseph Salisbury Date: Mon, 16 Mar 2026 14:05:45 -0400 Subject: [PATCH 425/554] ASoC: fsl_easrc: fix comment typo commit 804dce6c73fdfa44184ee4e8b09abad7f5da408f upstream. The file contains a spelling error in a source comment (funciton). Typos in comments reduce readability and make text searches less reliable for developers and maintainers. Replace 'funciton' with 'function' in the affected comment. This is a comment-only cleanup and does not change behavior. Fixes: 955ac624058f ("ASoC: fsl_easrc: Add EASRC ASoC CPU DAI drivers") Cc: stable@vger.kernel.org Signed-off-by: Joseph Salisbury Link: https://patch.msgid.link/20260316180545.144032-1-joseph.salisbury@oracle.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/fsl/fsl_easrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/fsl/fsl_easrc.c b/sound/soc/fsl/fsl_easrc.c index d86f01fbecd4a..8e52e43a8ba85 100644 --- a/sound/soc/fsl/fsl_easrc.c +++ b/sound/soc/fsl/fsl_easrc.c @@ -1286,7 +1286,7 @@ static int fsl_easrc_request_context(int channels, struct fsl_asrc_pair *ctx) /* * Release the context * - * This funciton is mainly doing the revert thing in request context + * This function is mainly doing the revert thing in request context */ static void fsl_easrc_release_context(struct fsl_asrc_pair *ctx) { From 4e550b59315d6dabda09727d53d42c51d48453ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Mon, 27 Apr 2026 23:38:41 -0300 Subject: [PATCH 426/554] ASoC: Intel: bytcr_wm5102: Fix MCLK leak on platform_clock_control error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 13d30682e8dee191ac04e93642f0372a723e8b0c upstream. If byt_wm5102_prepare_and_enable_pll1() fails in the SND_SOC_DAPM_EVENT_ON() path, platform_clock_control() returns after clk_prepare_enable(priv->mclk) without disabling the clock again. This leaks an MCLK enable reference on failed power-up attempts. Add the missing clk_disable_unprepare() on the error path, matching the unwind used by the other Intel platform_clock_control() implementations. Fixes: 9a87fc1e0619 ("ASoC: Intel: bytcr_wm5102: Add machine driver for BYT/WM5102") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Reviewed-by: Cezary Rojewski Reviewed-by: Hans de Goede Link: https://patch.msgid.link/20260427-bytcr-wm5102-mclk-leak-v1-1-02b96d08e99c@gmail.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/boards/bytcr_wm5102.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/boards/bytcr_wm5102.c b/sound/soc/intel/boards/bytcr_wm5102.c index 02b84c877e5f3..21979a6eaa0ff 100644 --- a/sound/soc/intel/boards/bytcr_wm5102.c +++ b/sound/soc/intel/boards/bytcr_wm5102.c @@ -171,6 +171,7 @@ static int platform_clock_control(struct snd_soc_dapm_widget *w, ret = byt_wm5102_prepare_and_enable_pll1(codec_dai, 48000); if (ret) { dev_err(card->dev, "Error setting codec sysclk: %d\n", ret); + clk_disable_unprepare(priv->mclk); return ret; } } else { From 6d43355a5b9efaf5c5d3734cad82ee2ddafe4b43 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 2 Apr 2026 08:11:10 +0000 Subject: [PATCH 427/554] ASoC: qcom: q6apm-dai: reset queue ptr on trigger stop commit cab45ab95ce7600fc0ff84585c77fd45b7b0d67c upstream. Reset queue pointer on SNDRV_PCM_TRIGGER_STOP event to be inline with resetting appl_ptr. Without this we will end up with a queue_ptr out of sync and driver could try to send data that is not ready yet. Fix this by resetting the queue_ptr. Fixes: 3d4a4411aa8bb ("ASoC: q6apm-dai: schedule all available frames to avoid dsp under-runs") Cc: Stable@vger.kernel.org Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20260402081118.348071-6-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/qcom/qdsp6/q6apm-dai.c | 1 + sound/soc/qcom/qdsp6/q6apm.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c index a188fffc705e7..54c1a33f9f937 100644 --- a/sound/soc/qcom/qdsp6/q6apm-dai.c +++ b/sound/soc/qcom/qdsp6/q6apm-dai.c @@ -323,6 +323,7 @@ static int q6apm_dai_trigger(struct snd_soc_component *component, case SNDRV_PCM_TRIGGER_STOP: /* TODO support be handled via SoftPause Module */ prtd->state = Q6APM_STREAM_STOPPED; + prtd->queue_ptr = 0; break; case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c index 93d782b9c2251..633f57de5a29e 100644 --- a/sound/soc/qcom/qdsp6/q6apm.c +++ b/sound/soc/qcom/qdsp6/q6apm.c @@ -225,6 +225,8 @@ int q6apm_map_memory_regions(struct q6apm_graph *graph, unsigned int dir, phys_a mutex_lock(&graph->lock); + data->dsp_buf = 0; + if (data->buf) { mutex_unlock(&graph->lock); return 0; From 7cab9f2ad51c858263da836baebad050a1bc7914 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 2 Apr 2026 08:11:09 +0000 Subject: [PATCH 428/554] ASoC: qcom: q6apm-lpass-dai: Fix multiple graph opens commit 69acc488aaf39d0ddf6c3cf0e47c1873d39919a2 upstream. As prepare can be called mulitple times, this can result in multiple graph opens for playback path. This will result in a memory leaks, fix this by adding a check before opening. Fixes: be1fae62cf25 ("ASoC: q6apm-lpass-dai: close graph on prepare errors") Cc: Stable@vger.kernel.org Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20260402081118.348071-5-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/qcom/qdsp6/q6apm-lpass-dais.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c index 5be37eeea329f..ba64117b8cfe1 100644 --- a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c +++ b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c @@ -181,7 +181,7 @@ static int q6apm_lpass_dai_prepare(struct snd_pcm_substream *substream, struct s * It is recommend to load DSP with source graph first and then sink * graph, so sequence for playback and capture will be different */ - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && dai_data->graph[dai->id] == NULL) { graph = q6apm_graph_open(dai->dev, NULL, dai->dev, graph_id); if (IS_ERR(graph)) { dev_err(dai->dev, "Failed to open graph (%d)\n", graph_id); From 48102d8890902f8f011757f294e57a9b7d8d2621 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 2 Apr 2026 08:11:07 +0000 Subject: [PATCH 429/554] ASoC: qcom: q6apm: remove child devices when apm is removed commit 4a0e1bcc98f7281d1605768bd2fe71eacc34f9b7 upstream. looks like q6apm driver does not remove the child driver q6apm-dai and q6apm-bedais when the this driver is removed. Fix this by depopulating them in remove callback. With this change when the dsp is shutdown all the devices associated with q6apm will now be removed. Fixes: 5477518b8a0e ("ASoC: qdsp6: audioreach: add q6apm support") Cc: Stable@vger.kernel.org Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20260402081118.348071-3-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/qcom/qdsp6/q6apm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c index 633f57de5a29e..d425cf1bcbb42 100644 --- a/sound/soc/qcom/qdsp6/q6apm.c +++ b/sound/soc/qcom/qdsp6/q6apm.c @@ -784,6 +784,7 @@ static int apm_probe(gpr_device_t *gdev) static void apm_remove(gpr_device_t *gdev) { + of_platform_depopulate(&gdev->dev); snd_soc_unregister_component(&gdev->dev); } From 9a060970fd7b5e1c561e4ce73cb9949e4269a738 Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Wed, 1 Apr 2026 18:56:19 +0800 Subject: [PATCH 430/554] btrfs: fix double free in create_space_info() error path commit 3f487be81292702a59ea9dbc4088b3360a50e837 upstream. When kobject_init_and_add() fails, the call chain is: create_space_info() -> btrfs_sysfs_add_space_info_type() -> kobject_init_and_add() -> failure -> kobject_put(&space_info->kobj) -> space_info_release() -> kfree(space_info) Then control returns to create_space_info(): btrfs_sysfs_add_space_info_type() returns error -> goto out_free -> kfree(space_info) This causes a double free. Keep the direct kfree(space_info) for the earlier failure path, but after btrfs_sysfs_add_space_info_type() has called kobject_put(), let the kobject release callback handle the cleanup. Fixes: a11224a016d6d ("btrfs: fix memory leaks in create_space_info() error paths") CC: stable@vger.kernel.org # 6.19+ Reviewed-by: Qu Wenruo Signed-off-by: Guangshuo Li Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/space-info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 194f590201658..a815308e2db91 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -311,7 +311,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags) ret = btrfs_sysfs_add_space_info_type(info, space_info); if (ret) - goto out_free; + return ret; list_add(&space_info->list, &info->space_info); if (flags & BTRFS_BLOCK_GROUP_DATA) From fb388eb58c1ba047ccabc33901839acfecadcf49 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 9 Apr 2026 15:46:51 +0100 Subject: [PATCH 431/554] btrfs: fix missing last_unlink_trans update when removing a directory commit 999757231c49376cd1a37308d2c8c4c9932571e1 upstream. When removing a directory we are not updating its last_unlink_trans field, which can result in incorrect fsync behaviour in case some one fsyncs the directory after it was removed because it's holding a file descriptor on it. Example scenario: mkdir /mnt/dir1 mkdir /mnt/dir1/dir2 mkdir /mnt/dir3 sync -f /mnt # Do some change to the directory and fsync it. chmod 700 /mnt/dir1 xfs_io -c fsync /mnt/dir1 # Move dir2 out of dir1 so that dir1 becomes empty. mv /mnt/dir1/dir2 /mnt/dir3/ open fd on /mnt/dir1 call rmdir(2) on path "/mnt/dir1" fsync fd When attempting to mount the filesystem, the log replay will fail with an -EIO error and dmesg/syslog has the following: [445771.626482] BTRFS info (device dm-0): first mount of filesystem 0368bbea-6c5e-44b5-b409-09abe496e650 [445771.626486] BTRFS info (device dm-0): using crc32c checksum algorithm [445771.627912] BTRFS info (device dm-0): start tree-log replay [445771.628335] page: refcount:2 mapcount:0 mapping:0000000061443ddc index:0x1d00 pfn:0x7072a5 [445771.629453] memcg:ffff89f400351b00 [445771.629892] aops:btree_aops [btrfs] ino:1 [445771.630737] flags: 0x17fffc00000402a(uptodate|lru|private|writeback|node=0|zone=2|lastcpupid=0x1ffff) [445771.632359] raw: 017fffc00000402a fffff47284d950c8 fffff472907b7c08 ffff89f458e412b8 [445771.633713] raw: 0000000000001d00 ffff89f6c51d1a90 00000002ffffffff ffff89f400351b00 [445771.635029] page dumped because: eb page dump [445771.635825] BTRFS critical (device dm-0): corrupt leaf: root=5 block=30408704 slot=10 ino=258, invalid nlink: has 2 expect no more than 1 for dir [445771.638088] BTRFS info (device dm-0): leaf 30408704 gen 10 total ptrs 17 free space 14878 owner 5 [445771.638091] BTRFS info (device dm-0): refs 4 lock_owner 0 current 3581087 [445771.638094] item 0 key (256 INODE_ITEM 0) itemoff 16123 itemsize 160 [445771.638097] inode generation 3 transid 9 size 16 nbytes 16384 [445771.638098] block group 0 mode 40755 links 1 uid 0 gid 0 [445771.638100] rdev 0 sequence 2 flags 0x0 [445771.638102] atime 1775744884.0 [445771.660056] ctime 1775744885.645502983 [445771.660058] mtime 1775744885.645502983 [445771.660060] otime 1775744884.0 [445771.660062] item 1 key (256 INODE_REF 256) itemoff 16111 itemsize 12 [445771.660064] index 0 name_len 2 [445771.660066] item 2 key (256 DIR_ITEM 1843588421) itemoff 16077 itemsize 34 [445771.660068] location key (259 1 0) type 2 [445771.660070] transid 9 data_len 0 name_len 4 [445771.660075] item 3 key (256 DIR_ITEM 2363071922) itemoff 16043 itemsize 34 [445771.660076] location key (257 1 0) type 2 [445771.660077] transid 9 data_len 0 name_len 4 [445771.660078] item 4 key (256 DIR_INDEX 2) itemoff 16009 itemsize 34 [445771.660079] location key (257 1 0) type 2 [445771.660080] transid 9 data_len 0 name_len 4 [445771.660081] item 5 key (256 DIR_INDEX 3) itemoff 15975 itemsize 34 [445771.660082] location key (259 1 0) type 2 [445771.660083] transid 9 data_len 0 name_len 4 [445771.660084] item 6 key (257 INODE_ITEM 0) itemoff 15815 itemsize 160 [445771.660086] inode generation 9 transid 9 size 8 nbytes 0 [445771.660087] block group 0 mode 40777 links 1 uid 0 gid 0 [445771.660088] rdev 0 sequence 2 flags 0x0 [445771.660089] atime 1775744885.641174097 [445771.660090] ctime 1775744885.645502983 [445771.660091] mtime 1775744885.645502983 [445771.660105] otime 1775744885.641174097 [445771.660106] item 7 key (257 INODE_REF 256) itemoff 15801 itemsize 14 [445771.660107] index 2 name_len 4 [445771.660108] item 8 key (257 DIR_ITEM 2676584006) itemoff 15767 itemsize 34 [445771.660109] location key (258 1 0) type 2 [445771.660110] transid 9 data_len 0 name_len 4 [445771.660111] item 9 key (257 DIR_INDEX 2) itemoff 15733 itemsize 34 [445771.660112] location key (258 1 0) type 2 [445771.660113] transid 9 data_len 0 name_len 4 [445771.660114] item 10 key (258 INODE_ITEM 0) itemoff 15573 itemsize 160 [445771.660115] inode generation 9 transid 10 size 0 nbytes 0 [445771.660116] block group 0 mode 40755 links 2 uid 0 gid 0 [445771.660117] rdev 0 sequence 0 flags 0x0 [445771.660118] atime 1775744885.645502983 [445771.660119] ctime 1775744885.645502983 [445771.660120] mtime 1775744885.645502983 [445771.660121] otime 1775744885.645502983 [445771.660122] item 11 key (258 INODE_REF 257) itemoff 15559 itemsize 14 [445771.660123] index 2 name_len 4 [445771.660124] item 12 key (258 INODE_REF 259) itemoff 15545 itemsize 14 [445771.660125] index 2 name_len 4 [445771.660126] item 13 key (259 INODE_ITEM 0) itemoff 15385 itemsize 160 [445771.660127] inode generation 9 transid 10 size 8 nbytes 0 [445771.660128] block group 0 mode 40755 links 1 uid 0 gid 0 [445771.660129] rdev 0 sequence 1 flags 0x0 [445771.660130] atime 1775744885.645502983 [445771.660130] ctime 1775744885.645502983 [445771.660131] mtime 1775744885.645502983 [445771.660132] otime 1775744885.645502983 [445771.660133] item 14 key (259 INODE_REF 256) itemoff 15371 itemsize 14 [445771.660134] index 3 name_len 4 [445771.660135] item 15 key (259 DIR_ITEM 2676584006) itemoff 15337 itemsize 34 [445771.660136] location key (258 1 0) type 2 [445771.660137] transid 10 data_len 0 name_len 4 [445771.660138] item 16 key (259 DIR_INDEX 2) itemoff 15303 itemsize 34 [445771.660139] location key (258 1 0) type 2 [445771.660140] transid 10 data_len 0 name_len 4 [445771.660144] BTRFS error (device dm-0): block=30408704 write time tree block corruption detected [445771.661650] ------------[ cut here ]------------ [445771.662358] WARNING: fs/btrfs/disk-io.c:326 at btree_csum_one_bio+0x217/0x230 [btrfs], CPU#8: mount/3581087 [445771.663588] Modules linked in: btrfs f2fs xfs (...) [445771.671229] CPU: 8 UID: 0 PID: 3581087 Comm: mount Tainted: G W 7.0.0-rc6-btrfs-next-230+ #2 PREEMPT(full) [445771.672575] Tainted: [W]=WARN [445771.672987] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 [445771.674460] RIP: 0010:btree_csum_one_bio+0x217/0x230 [btrfs] [445771.675222] Code: 89 44 24 (...) [445771.677364] RSP: 0018:ffffd23882247660 EFLAGS: 00010246 [445771.678029] RAX: 0000000000000000 RBX: ffff89f6c51d1a90 RCX: 0000000000000000 [445771.678975] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff89f406020000 [445771.679983] RBP: ffff89f821204000 R08: 0000000000000000 R09: 00000000ffefffff [445771.680905] R10: ffffd23882247448 R11: 0000000000000003 R12: ffffd23882247668 [445771.681978] R13: ffff89f458e40fc0 R14: ffff89f737f4f500 R15: ffff89f737f4f500 [445771.682912] FS: 00007f0447a98840(0000) GS:ffff89fb9771d000(0000) knlGS:0000000000000000 [445771.684393] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [445771.685230] CR2: 00007f0447bf1330 CR3: 000000017cb02002 CR4: 0000000000370ef0 [445771.686273] Call Trace: [445771.686646] [445771.686969] btrfs_submit_bbio+0x83f/0x860 [btrfs] [445771.687750] ? write_one_eb+0x28f/0x340 [btrfs] [445771.688428] btree_writepages+0x2e3/0x550 [btrfs] [445771.689180] ? kmem_cache_alloc_noprof+0x12a/0x490 [445771.689963] ? alloc_extent_state+0x19/0x120 [btrfs] [445771.690801] ? kmem_cache_free+0x135/0x380 [445771.691328] ? preempt_count_add+0x69/0xa0 [445771.691831] ? set_extent_bit+0x252/0x8e0 [btrfs] [445771.692468] ? xas_load+0x9/0xc0 [445771.692873] ? xas_find+0x14d/0x1a0 [445771.693304] do_writepages+0xc6/0x160 [445771.693756] filemap_writeback+0xb8/0xe0 [445771.694274] btrfs_write_marked_extents+0x61/0x170 [btrfs] [445771.694999] btrfs_write_and_wait_transaction+0x4e/0xc0 [btrfs] [445771.695818] btrfs_commit_transaction+0x5c8/0xd10 [btrfs] [445771.696530] ? kmem_cache_free+0x135/0x380 [445771.697120] ? release_extent_buffer+0x34/0x160 [btrfs] [445771.697786] btrfs_recover_log_trees+0x7be/0x7e0 [btrfs] [445771.698525] ? __pfx_replay_one_buffer+0x10/0x10 [btrfs] [445771.699206] open_ctree+0x11e5/0x1810 [btrfs] [445771.699776] btrfs_get_tree.cold+0xb/0x162 [btrfs] [445771.700463] ? fscontext_read+0x165/0x180 [445771.701146] ? rw_verify_area+0x50/0x180 [445771.701866] vfs_get_tree+0x25/0xd0 [445771.702491] vfs_cmd_create+0x59/0xe0 [445771.703125] __do_sys_fsconfig+0x303/0x610 [445771.703603] do_syscall_64+0xe9/0xf20 [445771.703974] entry_SYSCALL_64_after_hwframe+0x76/0x7e [445771.704700] RIP: 0033:0x7f0447cbd4aa [445771.705108] Code: 73 01 c3 (...) [445771.707263] RSP: 002b:00007ffc4e528318 EFLAGS: 00000246 ORIG_RAX: 00000000000001af [445771.708107] RAX: ffffffffffffffda RBX: 00005561585d8c20 RCX: 00007f0447cbd4aa [445771.708931] RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000003 [445771.709744] RBP: 00005561585d9120 R08: 0000000000000000 R09: 0000000000000000 [445771.710674] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [445771.711477] R13: 00007f0447e4f580 R14: 00007f0447e5126c R15: 00007f0447e36a23 [445771.712277] [445771.712541] ---[ end trace 0000000000000000 ]--- [445771.713382] BTRFS error (device dm-0): error while writing out transaction: -5 [445771.714679] BTRFS warning (device dm-0): Skipping commit of aborted transaction. [445771.715562] BTRFS error (device dm-0 state A): Transaction aborted (error -5) [445771.716459] BTRFS: error (device dm-0 state A) in cleanup_transaction:2068: errno=-5 IO failure [445771.717936] BTRFS error (device dm-0 state EA): failed to recover log trees with error: -5 [445771.719681] BTRFS error (device dm-0 state EA): open_ctree failed: -5 The problem is that such a fsync should have result in a fallback to a transaction commit, but that did not happen because through the btrfs_rmdir() we never update the directory's last_unlink_trans field. Any inode that had a link removed must have its last_unlink_trans updated to the ID of transaction used for the operation, otherwise fsync and log replay will not work correctly. btrfs_rmdir() calls btrfs_unlink_inode() and through that call chain we never call btrfs_record_unlink_dir() in order to update last_unlink_trans. However btrfs_unlink(), which is used for unlinking regular files, calls btrfs_record_unlink_dir() and then calls btrfs_unlink_inode(). So fix this by moving the call to btrfs_record_unlink_dir() from btrfs_unlink() to btrfs_unlink_inode(). A test case for fstests will follow soon. Reported-by: Slava0135 Link: https://lore.kernel.org/linux-btrfs/CAAJYhww5ov62Hm+n+tmhcL-e_4cBobg+OWogKjOJxVUXivC=MQ@mail.gmail.com/ CC: stable@vger.kernel.org Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7fe868a6a51b4..feaa6de8a90f2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4826,6 +4826,8 @@ static int btrfs_rmdir(struct inode *vfs_dir, struct dentry *dentry) if (ret) goto out; + btrfs_record_unlink_dir(trans, dir, inode, false); + /* now the directory is empty */ ret = btrfs_unlink_inode(trans, dir, inode, &fname.disk_name); if (!ret) From 85311a585a26640760cd0f3349ab9f2905691044 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 20 Apr 2026 19:56:44 +0200 Subject: [PATCH 432/554] dm-thin: fix metadata refcount underflow commit 09a65adc7d8bbfce06392cb6d375468e2728ead5 upstream. There's a bug in dm-thin in the function rebalance_children. If the internal btree node has one entry, the code tries to copy all btree entries from the node's child to the node itself and then decrement the child's reference count. If the child node is shared (it has reference count > 1), we won't free it, so there would be two pointers to each of the grandchildren nodes. But the reference counts of the grandchildren is not increased, thus the reference count doesn't match the number of pointers that point to the grandchildren. This results in "device mapper: space map common: unable to decrement block" errors. Fix this bug by incrementing reference counts on the grandchildren if the btree node is shared. Signed-off-by: Mikulas Patocka Fixes: 3241b1d3e0aa ("dm: add persistent data library") Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-btree-remove.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 942cd47eb52da..aeec5b9a1dd5c 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -490,12 +490,20 @@ static int rebalance_children(struct shadow_spine *s, if (le32_to_cpu(n->header.nr_entries) == 1) { struct dm_block *child; + int is_shared; dm_block_t b = value64(n, 0); + r = dm_tm_block_is_shared(info->tm, b, &is_shared); + if (r) + return r; + r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child); if (r) return r; + if (is_shared) + inc_children(info->tm, dm_block_data(child), vt); + memcpy(n, dm_block_data(child), dm_bm_block_size(dm_tm_get_bm(info->tm))); From cb7c6a89e5d8d1e5d7a1214ae98558141e566c9e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 16 Mar 2026 15:04:15 +0100 Subject: [PATCH 433/554] dm: don't report warning when doing deferred remove commit b7cce3e2cca9cd78418f3c3784474b778e7996fe upstream. If dm_hash_remove_all was called from dm_deferred_remove, it would write a warning "remove_all left %d open device(s)" if there are some other devices active. The warning is bogus, so let's disable it in this case. Signed-off-by: Mikulas Patocka Reported-by: Zdenek Kabelac Cc: stable@vger.kernel.org Fixes: 2c140a246dc0 ("dm: allow remove to be deferred") Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 4165fef4c1707..dd3fc95537cb5 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -384,7 +384,7 @@ static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool up_write(&_hash_lock); - if (dev_skipped) + if (dev_skipped && !only_deferred) DMWARN("remove_all left %d open device(s)", dev_skipped); } From 5af6a879e915ae7bcd83695c316ebb32e1c61bc2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 9 Apr 2026 17:49:58 +0200 Subject: [PATCH 434/554] dm: fix a buffer overflow in ioctl processing commit 2fa49cc884f6496a915c35621ba4da35649bf159 upstream. Tony Asleson (using Claude) found a buffer overflow in dm-ioctl in the function retrieve_status: 1. The code in retrieve_status checks that the output string fits into the output buffer and writes the output string there 2. Then, the code aligns the "outptr" variable to the next 8-byte boundary: outptr = align_ptr(outptr); 3. The alignment doesn't check overflow, so outptr could point past the buffer end 4. The "for" loop is iterated again, it executes: remaining = len - (outptr - outbuf); 5. If "outptr" points past "outbuf + len", the arithmetics wraps around and the variable "remaining" contains unusually high number 6. With "remaining" being high, the code writes more data past the end of the buffer Luckily, this bug has no security implications because: 1. Only root can issue device mapper ioctls 2. The commonly used libraries that communicate with device mapper (libdevmapper and devicemapper-rs) use buffer size that is aligned to 8 bytes - thus, "outptr = align_ptr(outptr)" can't overshoot the input buffer and the bug can't happen accidentally Reported-by: Tony Asleson Signed-off-by: Mikulas Patocka Reviewed-by: Bryn M. Reeves Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index dd3fc95537cb5..de90feb842ab6 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1341,6 +1341,10 @@ static void retrieve_status(struct dm_table *table, used = param->data_start + (outptr - outbuf); outptr = align_ptr(outptr); + if (!outptr || outptr > outbuf + len) { + param->flags |= DM_BUFFER_FULL_FLAG; + break; + } spec->next = outptr - outbuf; } From 52b109f1b875b912d4ab2c5fdd8c322d47119d9b Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sat, 18 Apr 2026 20:17:37 +0100 Subject: [PATCH 435/554] eventfs: Hold eventfs_mutex and SRCU when remount walks events commit 07004a8c4b572171934390148ee48c4175c77eed upstream. Commit 340f0c7067a9 ("eventfs: Update all the eventfs_inodes from the events descriptor") had eventfs_set_attrs() recurse through ei->children on remount. The walk only holds the rcu_read_lock() taken by tracefs_apply_options() over tracefs_inodes, which is wrong: - list_for_each_entry over ei->children races with the list_del_rcu() in eventfs_remove_rec() -- LIST_POISON1 deref, same shape as d2603279c7d6. - eventfs_inodes are freed via call_srcu(&eventfs_srcu, ...). rcu_read_lock() does not extend an SRCU grace period, so ti->private can be reclaimed under the walk. - The writes to ei->attr race with eventfs_set_attr(), which holds eventfs_mutex. Reproducer: while :; do mount -o remount,uid=$((RANDOM%1000)) /sys/kernel/tracing; done & while :; do echo "p:kp submit_bio" > /sys/kernel/tracing/kprobe_events echo > /sys/kernel/tracing/kprobe_events done Wrap the events portion of tracefs_apply_options() in eventfs_remount_lock()/_unlock() that take eventfs_mutex and srcu_read_lock(&eventfs_srcu). eventfs_set_attrs() doesn't sleep so the nested rcu_read_lock() is fine; lockdep_assert_held() pins the contract. Comment in tracefs_drop_inode() said "RCU cycle" -- it is SRCU. Fixes: 340f0c7067a9 ("eventfs: Update all the eventfs_inodes from the events descriptor") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260418191737.10289-1-devnexen@gmail.com Signed-off-by: David Carlier Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- fs/tracefs/event_inode.c | 14 ++++++++++++++ fs/tracefs/inode.c | 5 ++++- fs/tracefs/internal.h | 3 +++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 93c231601c8e2..0d2bc92b760f3 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -250,6 +250,8 @@ static void eventfs_set_attrs(struct eventfs_inode *ei, bool update_uid, kuid_t { struct eventfs_inode *ei_child; + lockdep_assert_held(&eventfs_mutex); + /* Update events// */ if (WARN_ON_ONCE(level > 3)) return; @@ -912,3 +914,15 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei) d_invalidate(dentry); dput(dentry); } + +int eventfs_remount_lock(void) +{ + mutex_lock(&eventfs_mutex); + return srcu_read_lock(&eventfs_srcu); +} + +void eventfs_remount_unlock(int srcu_idx) +{ + srcu_read_unlock(&eventfs_srcu, srcu_idx); + mutex_unlock(&eventfs_mutex); +} diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 35c0cc557fbb7..363a5d7554119 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -336,6 +336,7 @@ static int tracefs_apply_options(struct super_block *sb, bool remount) struct inode *inode = d_inode(sb->s_root); struct tracefs_inode *ti; bool update_uid, update_gid; + int srcu_idx; umode_t tmp_mode; /* @@ -360,6 +361,7 @@ static int tracefs_apply_options(struct super_block *sb, bool remount) update_uid = fsi->opts & BIT(Opt_uid); update_gid = fsi->opts & BIT(Opt_gid); + srcu_idx = eventfs_remount_lock(); rcu_read_lock(); list_for_each_entry_rcu(ti, &tracefs_inodes, list) { if (update_uid) { @@ -381,6 +383,7 @@ static int tracefs_apply_options(struct super_block *sb, bool remount) eventfs_remount(ti, update_uid, update_gid); } rcu_read_unlock(); + eventfs_remount_unlock(srcu_idx); } return 0; @@ -426,7 +429,7 @@ static int tracefs_drop_inode(struct inode *inode) * This inode is being freed and cannot be used for * eventfs. Clear the flag so that it doesn't call into * eventfs during the remount flag updates. The eventfs_inode - * gets freed after an RCU cycle, so the content will still + * gets freed after an SRCU cycle, so the content will still * be safe if the iteration is going on now. */ ti->flags &= ~TRACEFS_EVENT_INODE; diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index d83c2a25f288e..a4a7f8431affb 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -76,4 +76,7 @@ struct inode *tracefs_get_inode(struct super_block *sb); void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid); void eventfs_d_release(struct dentry *dentry); +int eventfs_remount_lock(void); +void eventfs_remount_unlock(int srcu_idx); + #endif /* _TRACEFS_INTERNAL_H */ From 9a484c8149b2096baf911d5d5a68677b30e04f05 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 5 Feb 2026 20:59:20 -0800 Subject: [PATCH 436/554] dm-verity-fec: correctly reject too-small FEC devices commit 2b14e0bb63cc671120e7791658f5c494fc66d072 upstream. Fix verity_fec_ctr() to reject too-small FEC devices by correctly computing the number of parity blocks as 'f->rounds * f->roots'. Previously it incorrectly used 'div64_u64(f->rounds * f->roots, v->fec->roots << SECTOR_SHIFT)' which is a much smaller value. Note that the units of 'rounds' are blocks, not bytes. This matches the units of the value returned by dm_bufio_get_device_size(), which are also blocks. A later commit will give 'rounds' a clearer name. Fixes: a739ff3f543a ("dm verity: add support for forward error correction") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Mikulas Patocka Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-fec.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 5365b3987374a..b2eb2151c8da9 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -657,7 +657,7 @@ int verity_fec_ctr(struct dm_verity *v) { struct dm_verity_fec *f = v->fec; struct dm_target *ti = v->ti; - u64 hash_blocks, fec_blocks; + u64 hash_blocks; int ret; if (!verity_fec_is_enabled(v)) { @@ -738,8 +738,7 @@ int verity_fec_ctr(struct dm_verity *v) dm_bufio_set_sector_offset(f->bufio, f->start << (v->data_dev_block_bits - SECTOR_SHIFT)); - fec_blocks = div64_u64(f->rounds * f->roots, v->fec->roots << SECTOR_SHIFT); - if (dm_bufio_get_device_size(f->bufio) < fec_blocks) { + if (dm_bufio_get_device_size(f->bufio) < f->rounds * f->roots) { ti->error = "FEC device is too small"; return -E2BIG; } From 1919acc5876c18f3e04d2408e39be3584a4f3655 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 5 Feb 2026 20:59:21 -0800 Subject: [PATCH 437/554] dm-verity-fec: correctly reject too-small hash devices commit 4355142245f7e55336dcc005ec03592df4d546f8 upstream. Fix verity_fec_ctr() to reject too-small hash devices by correctly taking hash_start into account. Note that this is necessary because dm-verity doesn't call dm_bufio_set_sector_offset() on the hash device's bufio client (v->bufio). Thus, dm_bufio_get_device_size(v->bufio) returns a size relative to 0 rather than hash_start. An alternative fix would be to call dm_bufio_set_sector_offset() on v->bufio, but then all the code that reads from the hash device would have to be adjusted accordingly. Fixes: a739ff3f543a ("dm verity: add support for forward error correction") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Mikulas Patocka Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-fec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index b2eb2151c8da9..03e59b85132aa 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -720,7 +720,8 @@ int verity_fec_ctr(struct dm_verity *v) * it to be large enough. */ f->hash_blocks = f->blocks - v->data_blocks; - if (dm_bufio_get_device_size(v->bufio) < f->hash_blocks) { + if (dm_bufio_get_device_size(v->bufio) < + v->hash_start + f->hash_blocks) { ti->error = "Hash device is too small for " DM_VERITY_OPT_FEC_BLOCKS; return -E2BIG; From e69da8eeab74b4f4505024c38a17bce060fe7df8 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 19 Apr 2026 17:21:54 -0400 Subject: [PATCH 438/554] isofs: validate Rock Ridge CE continuation extent against volume size commit a36d990f591320e9dd379ab30063ebfe91d47e1f upstream. rock_continue() reads rs->cont_extent verbatim from the Rock Ridge CE record and passes it to sb_bread() without checking that the block number is within the mounted ISO 9660 volume. commit e595447e177b ("[PATCH] rock.c: handle corrupted directories") added cont_offset and cont_size rejection for the CE continuation but did not validate the extent block number itself. commit f54e18f1b831 ("isofs: Fix infinite looping over CE entries") later capped the CE chain length at RR_MAX_CE_ENTRIES = 32 but again left the block number unchecked. With a crafted ISO mounted via udisks2 (desktop optical auto-mount) or via CAP_SYS_ADMIN mount, rs->cont_extent can therefore point at an out-of-range block or at blocks belonging to an adjacent filesystem on the same block device. sb_bread() on an out-of-range block returns NULL cleanly via the block layer EIO path, so there is no memory-safety violation. For in-range reads of adjacent- filesystem data, the CE buffer is parsed as Rock Ridge records and only the text of SL sub-records reaches userspace through readlink(), which makes the info-leak channel narrow and difficult to exploit; still, rejecting the malformed CE outright matches the rejection shape already present in the same function for cont_offset and cont_size. Add an ISOFS_SB(sb)->s_nzones bounds check to rock_continue() next to the existing offset/size rejection, printing the same corrupted-directory-entry notice. Fixes: f54e18f1b831 ("isofs: Fix infinite looping over CE entries") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260419212155.2169382-2-michael.bommarito@gmail.com Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/isofs/rock.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 576498245b9d7..6c104fcb84481 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -101,6 +101,15 @@ static int rock_continue(struct rock_state *rs) goto out; } + if ((unsigned)rs->cont_extent >= ISOFS_SB(rs->inode->i_sb)->s_nzones) { + printk(KERN_NOTICE "rock: corrupted directory entry. " + "extent=%u out of volume (nzones=%lu)\n", + (unsigned)rs->cont_extent, + ISOFS_SB(rs->inode->i_sb)->s_nzones); + ret = -EIO; + goto out; + } + if (rs->cont_extent) { struct buffer_head *bh; From afbafeddf23db13fe2edb2d5c0bf4bbb13d7881b Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 19 Apr 2026 17:21:55 -0400 Subject: [PATCH 439/554] isofs: validate block number from NFS file handle in isofs_export_iget commit 24376458138387fb251e782e624c7776e9826796 upstream. isofs_fh_to_dentry() and isofs_fh_to_parent() pass an attacker- controlled block number (ifid->block or ifid->parent_block) from the NFS file handle to isofs_export_iget(), which only rejects block == 0 before calling isofs_iget() and ultimately sb_bread(). A crafted file handle with fh_len sufficient to pass the check added by commit 0405d4b63d08 ("isofs: Prevent the use of too small fid") can still drive the server to read any in-range block on the backing device as if it were an iso_directory_record. That earlier fix was assigned CVE-2025-37780. sb_bread() on an out-of-range block returns NULL cleanly via the EIO path, so there is no memory-safety violation. For in-range reads of adjacent-partition data on the same block device, the unrelated bytes end up in iso_inode_info fields that reach the NFS client as dentry metadata. The deployment surface (isofs exported over NFS from loop-mounted images) is narrow and requires an authenticated NFS peer, but the malformed-file-handle class is reportable as hardening next to the existing CVE-2025-37780 fix. Reject block >= ISOFS_SB(sb)->s_nzones in isofs_export_iget() so the check covers both isofs_fh_to_dentry() and isofs_fh_to_parent() call sites with a single line. Fixes: 0405d4b63d08 ("isofs: Prevent the use of too small fid") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260419212155.2169382-3-michael.bommarito@gmail.com Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/isofs/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 421d247fae523..78f80c1a5c54a 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -24,7 +24,7 @@ isofs_export_iget(struct super_block *sb, { struct inode *inode; - if (block == 0) + if (block == 0 || block >= ISOFS_SB(sb)->s_nzones) return ERR_PTR(-ESTALE); inode = isofs_iget(sb, block, offset); if (IS_ERR(inode)) From 0d96652adc8fd0ce2a02fe66fbebecd682a18fa8 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Sun, 29 Mar 2026 23:07:55 -0400 Subject: [PATCH 440/554] iommufd: Fix return value of iommufd_fault_fops_write() commit aaca2aa92785a6ab8e3183e7184bca447a99cd76 upstream. copy_from_user() may return number of bytes failed to copy, we should not pass over this number to user space to cheat that write() succeed. Instead, -EFAULT should be returned. Link: https://patch.msgid.link/r/20260330030755.12856-1-zhenzhong.duan@intel.com Cc: stable@vger.kernel.org Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Signed-off-by: Zhenzhong Duan Reviewed-by: Lu Baolu Reviewed-by: Pranjal Shrivastava Reviewed-by: Shuai Xue Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/iommufd/eventq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c index e23d9ee4fe380..8e45752f500e7 100644 --- a/drivers/iommu/iommufd/eventq.c +++ b/drivers/iommu/iommufd/eventq.c @@ -187,9 +187,10 @@ static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *b mutex_lock(&fault->mutex); while (count > done) { - rc = copy_from_user(&response, buf + done, response_size); - if (rc) + if (copy_from_user(&response, buf + done, response_size)) { + rc = -EFAULT; break; + } static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS == (int)IOMMU_PAGE_RESP_SUCCESS); From 3ea9ce757bd3de955b56e7bc5672fc479e40b045 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 2 Apr 2026 14:57:24 +0800 Subject: [PATCH 441/554] iommu/vt-d: Block PASID attachment to nested domain with dirty tracking commit cc5bd898ff70710ffc41cd8e5c2741cb64750047 upstream. Kernel lacks dirty tracking support on nested domain attached to PASID, fails the attachment early if nesting parent domain is dirty tracking configured, otherwise dirty pages would be lost. Cc: stable@vger.kernel.org Fixes: 67f6f56b5912 ("iommu/vt-d: Add set_dev_pasid callback for nested domain") Suggested-by: Kevin Tian Signed-off-by: Zhenzhong Duan Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20260330101108.12594-2-zhenzhong.duan@intel.com Signed-off-by: Lu Baolu Fixes: 67f6f56b5912 ("iommu/vt-d: Add set_dev_pasid callback for nested domain") Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel/nested.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index 1b6ad9c900a5a..e91b07475fb84 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -154,6 +154,7 @@ static int intel_nested_set_dev_pasid(struct iommu_domain *domain, { struct device_domain_info *info = dev_iommu_priv_get(dev); struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct iommu_domain *s2_domain = &dmar_domain->s2_domain->domain; struct intel_iommu *iommu = info->iommu; struct dev_pasid_info *dev_pasid; int ret; @@ -161,10 +162,13 @@ static int intel_nested_set_dev_pasid(struct iommu_domain *domain, if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) return -EOPNOTSUPP; + if (s2_domain->dirty_ops) + return -EINVAL; + if (context_copied(iommu, info->bus, info->devfn)) return -EBUSY; - ret = paging_domain_compatible(&dmar_domain->s2_domain->domain, dev); + ret = paging_domain_compatible(s2_domain, dev); if (ret) return ret; From cbb032fc7432167df1abc1e4ef70407336ec1a24 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 17 Mar 2026 00:59:16 -0700 Subject: [PATCH 442/554] iommu/arm-smmu-v3: Add a missing dma_wmb() for hitless STE update commit 6fabce53f6b9c2419012a9103e1a46d40888cefa upstream. When writing a new (previously invalid) valid IOPTE to a page table, then installing the page table into an STE hitlesslessly (e.g. in S2TTB field), there is a window before an STE invalidation, where the page-table may be accessed by SMMU but the new IOPTE is still siting in the CPU cache. This could occur when we allocate an iommu_domain and immediately install it hitlessly, while there would be no dma_wmb() for the page table memory prior to the earliest point of HW reading the STE. Fix it by adding a dma_wmb() prior to updating the STE. Fixes: 56e1a4cc2588 ("iommu/arm-smmu-v3: Add unit tests for arm_smmu_write_entry") Cc: stable@vger.kernel.org Reported-by: Will Deacon Closes: https://lore.kernel.org/linux-iommu/aXdlnLLFUBwjT0V5@willie-the-truck/ Suggested-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 1e47da0ce6b9b..10c8a29db8028 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1236,6 +1236,13 @@ void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry, __le64 unused_update[NUM_ENTRY_QWORDS]; u8 used_qword_diff; + /* + * Many of the entry structures have pointers to other structures that + * need to have their updates be visible before any writes of the entry + * happen. + */ + dma_wmb(); + used_qword_diff = arm_smmu_entry_qword_diff(writer, entry, target, unused_update); if (hweight8(used_qword_diff) == 1) { From 8637dfb4c1d8a7026ef681f2477c6de8b71c4003 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 12 Apr 2026 16:19:47 +0200 Subject: [PATCH 443/554] lib/crypto: mpi: Fix integer underflow in mpi_read_raw_from_sgl() commit 8c2f1288250a90a4b5cabed5d888d7e3aeed4035 upstream. Yiming reports an integer underflow in mpi_read_raw_from_sgl() when subtracting "lzeros" from the unsigned "nbytes". For this to happen, the scatterlist "sgl" needs to occupy more bytes than the "nbytes" parameter and the first "nbytes + 1" bytes of the scatterlist must be zero. Under these conditions, the while loop iterating over the scatterlist will count more zeroes than "nbytes", subtract the number of zeroes from "nbytes" and cause the underflow. When commit 2d4d1eea540b ("lib/mpi: Add mpi sgl helpers") originally introduced the bug, it couldn't be triggered because all callers of mpi_read_raw_from_sgl() passed a scatterlist whose length was equal to "nbytes". However since commit 63ba4d67594a ("KEYS: asymmetric: Use new crypto interface without scatterlists"), the underflow can now actually be triggered. When invoking a KEYCTL_PKEY_ENCRYPT system call with a larger "out_len" than "in_len" and filling the "in" buffer with zeroes, crypto_akcipher_sync_prep() will create an all-zero scatterlist used for both the "src" and "dst" member of struct akcipher_request and thereby fulfil the conditions to trigger the bug: sys_keyctl() keyctl_pkey_e_d_s() asymmetric_key_eds_op() software_key_eds_op() crypto_akcipher_sync_encrypt() crypto_akcipher_sync_prep() crypto_akcipher_encrypt() rsa_enc() mpi_read_raw_from_sgl() To the user this will be visible as a DoS as the kernel spins forever, causing soft lockup splats as a side effect. Fix it. Reported-by: Yiming Qian # off-list Fixes: 2d4d1eea540b ("lib/mpi: Add mpi sgl helpers") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v4.4+ Reviewed-by: Ignat Korchagin Reviewed-by: Jarkko Sakkinen Link: https://lore.kernel.org/r/59eca92ff4f87e2081777f1423a0efaaadcfdb39.1776003111.git.lukas@wunner.de Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- lib/crypto/mpi/mpicoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/crypto/mpi/mpicoder.c b/lib/crypto/mpi/mpicoder.c index 47f6939599b33..47d0b67113e06 100644 --- a/lib/crypto/mpi/mpicoder.c +++ b/lib/crypto/mpi/mpicoder.c @@ -347,7 +347,7 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) lzeros = 0; len = 0; while (nbytes > 0) { - while (len && !*buff) { + while (len && !*buff && lzeros < nbytes) { lzeros++; len--; buff++; From 8fbba6829057979149d1b37d65690c037f3ddf4d Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Thu, 26 Mar 2026 22:49:01 +0100 Subject: [PATCH 444/554] lib/scatterlist: fix length calculations in extract_kvec_to_sg commit 07b7d66e65d9cfe6b9c2c34aa22cfcaac37a5c45 upstream. Patch series "Fix bugs in extract_iter_to_sg()", v3. Fix bugs in the kvec and user variants of extract_iter_to_sg. This series is growing due to useful remarks made by sashiko.dev. The main bugs are: - The length for an sglist entry when extracting from a kvec can exceed the number of bytes in the page. This is obviously not intended. - When extracting a user buffer the sglist is temporarily used as a scratch buffer for extracted page pointers. If the sglist already contains some elements this scratch buffer could overlap with existing entries in the sglist. The series adds test cases to the kunit_iov_iter test that demonstrate all of these bugs. Additionally, there is a memory leak fix for the test itself. The bugs were orignally introduced into kernel v6.3 where the function lived in fs/netfs/iterator.c. It was later moved to lib/scatterlist.c in v6.5. Thus the actual fix is only marked for backports to v6.5+. This patch (of 5): When extracting from a kvec to a scatterlist, do not cross page boundaries. The required length was already calculated but not used as intended. Adjust the copied length if the loop runs out of sglist entries without extracting everything. While there, return immediately from extract_iter_to_sg if there are no sglist entries at all. A subsequent commit will add kunit test cases that demonstrate that the patch is necessary. Link: https://lkml.kernel.org/r/20260326214905.818170-1-lk@c--e.de Link: https://lkml.kernel.org/r/20260326214905.818170-2-lk@c--e.de Fixes: 018584697533 ("netfs: Add a function to extract an iterator into a scatterlist") Signed-off-by: Christian A. Ehrhardt Cc: David Gow Cc: David Howells Cc: Kees Cook Cc: Petr Mladek Cc: [v6.5+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- lib/scatterlist.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 4af1c8b0775a7..ca9f1c14b7b0d 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -1223,7 +1223,7 @@ static ssize_t extract_kvec_to_sg(struct iov_iter *iter, else page = virt_to_page((void *)kaddr); - sg_set_page(sg, page, len, off); + sg_set_page(sg, page, seg, off); sgtable->nents++; sg++; sg_max--; @@ -1232,6 +1232,7 @@ static ssize_t extract_kvec_to_sg(struct iov_iter *iter, kaddr += PAGE_SIZE; off = 0; } while (len > 0 && sg_max > 0); + ret -= len; if (maxsize <= 0 || sg_max == 0) break; @@ -1385,7 +1386,7 @@ ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize, struct sg_table *sgtable, unsigned int sg_max, iov_iter_extraction_t extraction_flags) { - if (maxsize == 0) + if (maxsize == 0 || sg_max == 0) return 0; switch (iov_iter_type(iter)) { From 7cdf18608302fcb55054b0b0f76d0c1cc74af4b1 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Thu, 26 Mar 2026 22:49:02 +0100 Subject: [PATCH 445/554] lib/scatterlist: fix temp buffer in extract_user_to_sg() commit 118cf3f55975352ac357fb194405031458186819 upstream. Instead of allocating a temporary buffer for extracted user pages extract_user_to_sg() uses the end of the to be filled scatterlist as a temporary buffer. Fix the calculation of the start address if the scatterlist already contains elements. The unused space starts at sgtable->sgl + sgtable->nents not directly at sgtable->nents and the temporary buffer is placed at the end of this unused space. A subsequent commit will add kunit test cases that demonstrate that the patch is necessary. Pointed out by sashiko.dev on a previous iteration of this series. Link: https://lkml.kernel.org/r/20260326214905.818170-3-lk@c--e.de Fixes: 018584697533 ("netfs: Add a function to extract an iterator into a scatterlist") Signed-off-by: Christian A. Ehrhardt Cc: David Howells Cc: David Gow Cc: Kees Cook Cc: Petr Mladek Cc: [v6.5+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- lib/scatterlist.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index ca9f1c14b7b0d..91fe08b7e8869 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -1099,8 +1099,7 @@ static ssize_t extract_user_to_sg(struct iov_iter *iter, size_t len, off; /* We decant the page list into the tail of the scatterlist */ - pages = (void *)sgtable->sgl + - array_size(sg_max, sizeof(struct scatterlist)); + pages = (void *)sg + array_size(sg_max, sizeof(struct scatterlist)); pages -= sg_max; do { From b7df9fbd4869fdfe09a3f501ffd228486521e062 Mon Sep 17 00:00:00 2001 From: Raphael Zimmer Date: Tue, 21 Apr 2026 10:27:01 +0200 Subject: [PATCH 446/554] libceph: Fix slab-out-of-bounds access in auth message processing commit 1c439de70b1c3eb3c6bffa8245c16b9fc318f114 upstream. If a (potentially corrupted) message of type CEPH_MSG_AUTH_REPLY contains a positive value in its result field, it is treated as an error code by ceph_handle_auth_reply() and returned to handle_auth_reply(). Thereafter, an attempt is made to send the preallocated message of type CEPH_MSG_AUTH, where the returned value is interpreted as the size of the front segment to send. If the result value in the message is greater than the size of the memory buffer allocated for the front segment, an out-of-bounds access occurs, and the content of the memory region beyond this buffer is sent out. This patch fixes the issue by treating only negative values in the result field as errors. Positive values are therefore treated as success in the same way as a zero value. Additionally, a BUG_ON is added to __send_prepared_auth_request() comparing the len parameter to front_alloc_len to prevent sending the message if it exceeds the bounds of the allocation and to make it easier to catch any logic flaws leading to this. Cc: stable@vger.kernel.org Signed-off-by: Raphael Zimmer Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth.c | 2 +- net/ceph/mon_client.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ceph/auth.c b/net/ceph/auth.c index 23d109cb0c6b2..06d0d73309c22 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -257,7 +257,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, ac->negotiating = false; } - if (result) { + if (result < 0) { pr_err("auth protocol '%s' mauth authentication failed: %d\n", ceph_auth_proto_name(ac->protocol), result); ret = result; diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 94a7a82ca4756..ff4311d5764bd 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -174,6 +174,8 @@ int ceph_monmap_contains(struct ceph_monmap *m, struct ceph_entity_addr *addr) */ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) { + BUG_ON(len > monc->m_auth->front_alloc_len); + monc->pending_auth = 1; monc->m_auth->front.iov_len = len; monc->m_auth->hdr.front_len = cpu_to_le32(len); From 913d556e4bd1b56ed822815655b82c7bb54edc51 Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Thu, 16 Apr 2026 11:39:56 +0800 Subject: [PATCH 447/554] md/raid10: fix divide-by-zero in setup_geo() with zero far_copies commit 9aa6d860b0930e2f72795665c42c44252a558a0c upstream. setup_geo() extracts near_copies (nc) and far_copies (fc) from the user-provided layout parameter without checking for zero. When fc=0 with the "improved" far set layout selected, 'geo->far_set_size = disks / fc' triggers a divide-by-zero. Validate nc and fc immediately after extraction, returning -1 if either is zero. Fixes: 475901aff158 ("MD RAID10: Improve redundancy for 'far' and 'offset' algorithms (part 1)") Cc: stable@vger.kernel.org Signed-off-by: Junrui Luo Link: https://lore.kernel.org/linux-raid/SYBPR01MB7881A5E2556806CC1D318582AF232@SYBPR01MB7881.ausprd01.prod.outlook.com Signed-off-by: Yu Kuai Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 706037d2a87c4..2815c05d1c9f0 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3856,6 +3856,8 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) nc = layout & 255; fc = (layout >> 8) & 255; fo = layout & (1<<16); + if (!nc || !fc) + return -1; geo->raid_disks = disks; geo->near_copies = nc; geo->far_copies = fc; From c77c82935c0a6869e73ef581621ab1c1900e14c7 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 8 Apr 2026 17:18:14 +0300 Subject: [PATCH 448/554] nvme-apple: drop invalid put of admin queue reference count commit ba9d308ccd6732dd97ed8080d834a4a89e758e14 upstream. Commit 03b3bcd319b3 ("nvme: fix admin request_queue lifetime") moved the admin queue reference ->put call into nvme_free_ctrl() - a controller device release callback performed for every nvme driver doing nvme_init_ctrl(). nvme-apple sets refcount of the admin queue to 1 at allocation during the probe function and then puts it twice now: nvme_free_ctrl() blk_put_queue(ctrl->admin_q) // #1 ->free_ctrl() apple_nvme_free_ctrl() blk_put_queue(anv->ctrl.admin_q) // #2 Note that there is a commit 941f7298c70c ("nvme-apple: remove an extra queue reference") which intended to drop taking an extra admin queue reference. Looks like at that moment it accidentally fixed a refcount leak, which existed since the driver's introduction. There were two ->get calls at driver's probe function and a single ->put inside apple_nvme_free_ctrl(). However now after commit 03b3bcd319b3 ("nvme: fix admin request_queue lifetime") the refcount is imbalanced again. Fix it by removing extra ->put call from apple_nvme_free_ctrl(). anv->dev and ctrl->dev point to the same device, so use ctrl->dev directly for simplification. Compile tested only. Found by Linux Verification Center (linuxtesting.org). Fixes: 03b3bcd319b3 ("nvme: fix admin request_queue lifetime") Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Fedor Pchelkin Signed-off-by: Keith Busch Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/apple.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 63819304290a1..86de41d0e7426 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1267,11 +1267,7 @@ static int apple_nvme_get_address(struct nvme_ctrl *ctrl, char *buf, int size) static void apple_nvme_free_ctrl(struct nvme_ctrl *ctrl) { - struct apple_nvme *anv = ctrl_to_apple_nvme(ctrl); - - if (anv->ctrl.admin_q) - blk_put_queue(anv->ctrl.admin_q); - put_device(anv->dev); + put_device(ctrl->dev); } static const struct nvme_ctrl_ops nvme_ctrl_ops = { From 67e1aaf93b495c2f10bc8a5fbba575fbb7f449b6 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 8 Apr 2026 00:51:31 -0700 Subject: [PATCH 449/554] nvmet-tcp: fix race between ICReq handling and queue teardown commit 5293a8882c549fab4a878bc76b0b6c951f980a61 upstream. nvmet_tcp_handle_icreq() updates queue->state after sending an Initialization Connection Response (ICResp), but it does so without serializing against target-side queue teardown. If an NVMe/TCP host sends an Initialization Connection Request (ICReq) and immediately closes the connection, target-side teardown may start in softirq context before io_work drains the already buffered ICReq. In that case, nvmet_tcp_schedule_release_queue() sets queue->state to NVMET_TCP_Q_DISCONNECTING and drops the queue reference under state_lock. If io_work later processes that ICReq, nvmet_tcp_handle_icreq() can still overwrite the state back to NVMET_TCP_Q_LIVE. That defeats the DISCONNECTING-state guard in nvmet_tcp_schedule_release_queue() and allows a later socket state change to re-enter teardown and issue a second kref_put() on an already released queue. The ICResp send failure path has the same problem. If teardown has already moved the queue to DISCONNECTING, a send error can still overwrite the state with NVMET_TCP_Q_FAILED, again reopening the window for a second teardown path to drop the queue reference. Fix this by serializing both post-send state transitions with state_lock and bailing out if teardown has already started. Use -ESHUTDOWN as an internal sentinel for that bail-out path rather than propagating it as a transport error like -ECONNRESET. Keep nvmet_tcp_socket_error() setting rcv_state to NVMET_TCP_RECV_ERR before honoring that sentinel so receive-side parsing stays quiesced until the existing release path completes. Fixes: c46a6465bac2 ("nvmet-tcp: add NVMe over TCP target driver") Cc: stable@vger.kernel.org Reported-by: Shivam Kumar Tested-by: Shivam Kumar Signed-off-by: Chaitanya Kulkarni Signed-off-by: Keith Busch Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/tcp.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 5c8d17bcc49bd..3d8810b42e9dc 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -398,6 +398,19 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) { + /* + * Keep rcv_state at RECV_ERR even for the internal -ESHUTDOWN path. + * nvmet_tcp_handle_icreq() can return -ESHUTDOWN after the ICReq has + * already been consumed and queue teardown has started. + * + * If nvmet_tcp_data_ready() or nvmet_tcp_write_space() queues + * nvmet_tcp_io_work() again before nvmet_tcp_release_queue_work() + * cancels it, the queue must not keep that old receive state. + * Otherwise the next nvmet_tcp_io_work() run can reach + * nvmet_tcp_done_recv_pdu() and try to handle the same ICReq again. + * + * That is why queue->rcv_state needs to be updated before we return. + */ queue->rcv_state = NVMET_TCP_RECV_ERR; if (queue->nvme_sq.ctrl) nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl); @@ -923,11 +936,24 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) iov.iov_len = sizeof(*icresp); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (ret < 0) { + spin_lock_bh(&queue->state_lock); + if (queue->state == NVMET_TCP_Q_DISCONNECTING) { + spin_unlock_bh(&queue->state_lock); + return -ESHUTDOWN; + } queue->state = NVMET_TCP_Q_FAILED; + spin_unlock_bh(&queue->state_lock); return ret; /* queue removal will cleanup */ } + spin_lock_bh(&queue->state_lock); + if (queue->state == NVMET_TCP_Q_DISCONNECTING) { + spin_unlock_bh(&queue->state_lock); + /* Tell nvmet_tcp_socket_error() teardown is in progress. */ + return -ESHUTDOWN; + } queue->state = NVMET_TCP_Q_LIVE; + spin_unlock_bh(&queue->state_lock); nvmet_prepare_receive_pdu(queue); return 0; } From 781f47d641432c26c19625b2cdd7f40825097592 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 8 Apr 2026 17:56:47 -0700 Subject: [PATCH 450/554] nvmet: avoid recursive nvmet-wq flush in nvmet_ctrl_free commit aade8abd8b868b6ffa9697aadaea28ec7f65bee6 upstream. nvmet_tcp_release_queue_work() runs on nvmet-wq and can drop the final controller reference through nvmet_cq_put(). If that triggers nvmet_ctrl_free(), the teardown path flushes ctrl->async_event_work on the same nvmet-wq. Call chain: nvmet_tcp_schedule_release_queue() kref_put(&queue->kref, nvmet_tcp_release_queue) nvmet_tcp_release_queue() queue_work(nvmet_wq, &queue->release_work) <--- nvmet_wq process_one_work() nvmet_tcp_release_queue_work() nvmet_cq_put(&queue->nvme_cq) nvmet_cq_destroy() nvmet_ctrl_put(cq->ctrl) nvmet_ctrl_free() flush_work(&ctrl->async_event_work) <--- nvmet_wq Previously Scheduled by :- nvmet_add_async_event queue_work(nvmet_wq, &ctrl->async_event_work); This trips lockdep with a possible recursive locking warning. [ 5223.015876] run blktests nvme/003 at 2026-04-07 20:53:55 [ 5223.061801] loop0: detected capacity change from 0 to 2097152 [ 5223.072206] nvmet: adding nsid 1 to subsystem blktests-subsystem-1 [ 5223.088368] nvmet_tcp: enabling port 0 (127.0.0.1:4420) [ 5223.126086] nvmet: Created discovery controller 1 for subsystem nqn.2014-08.org.nvmexpress.discovery for NQN nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349. [ 5223.128453] nvme nvme1: new ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery", addr 127.0.0.1:4420, hostnqn: nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349 [ 5233.199447] nvme nvme1: Removing ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery" [ 5233.227718] ============================================ [ 5233.231283] WARNING: possible recursive locking detected [ 5233.234696] 7.0.0-rc3nvme+ #20 Tainted: G O N [ 5233.238434] -------------------------------------------- [ 5233.241852] kworker/u192:6/2413 is trying to acquire lock: [ 5233.245429] ffff888111632548 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: touch_wq_lockdep_map+0x26/0x90 [ 5233.251438] but task is already holding lock: [ 5233.255254] ffff888111632548 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: process_one_work+0x5cc/0x6e0 [ 5233.261125] other info that might help us debug this: [ 5233.265333] Possible unsafe locking scenario: [ 5233.269217] CPU0 [ 5233.270795] ---- [ 5233.272436] lock((wq_completion)nvmet-wq); [ 5233.275241] lock((wq_completion)nvmet-wq); [ 5233.278020] *** DEADLOCK *** [ 5233.281793] May be due to missing lock nesting notation [ 5233.286195] 3 locks held by kworker/u192:6/2413: [ 5233.289192] #0: ffff888111632548 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: process_one_work+0x5cc/0x6e0 [ 5233.294569] #1: ffffc9000e2a7e40 ((work_completion)(&queue->release_work)){+.+.}-{0:0}, at: process_one_work+0x1c5/0x6e0 [ 5233.300128] #2: ffffffff82d7dc40 (rcu_read_lock){....}-{1:3}, at: __flush_work+0x62/0x530 [ 5233.304290] stack backtrace: [ 5233.306520] CPU: 4 UID: 0 PID: 2413 Comm: kworker/u192:6 Tainted: G O N 7.0.0-rc3nvme+ #20 PREEMPT(full) [ 5233.306524] Tainted: [O]=OOT_MODULE, [N]=TEST [ 5233.306525] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014 [ 5233.306527] Workqueue: nvmet-wq nvmet_tcp_release_queue_work [nvmet_tcp] [ 5233.306532] Call Trace: [ 5233.306534] [ 5233.306536] dump_stack_lvl+0x73/0xb0 [ 5233.306552] print_deadlock_bug+0x225/0x2f0 [ 5233.306556] __lock_acquire+0x13f0/0x2290 [ 5233.306563] lock_acquire+0xd0/0x300 [ 5233.306565] ? touch_wq_lockdep_map+0x26/0x90 [ 5233.306571] ? __flush_work+0x20b/0x530 [ 5233.306573] ? touch_wq_lockdep_map+0x26/0x90 [ 5233.306577] touch_wq_lockdep_map+0x3b/0x90 [ 5233.306580] ? touch_wq_lockdep_map+0x26/0x90 [ 5233.306583] ? __flush_work+0x20b/0x530 [ 5233.306585] __flush_work+0x268/0x530 [ 5233.306588] ? __pfx_wq_barrier_func+0x10/0x10 [ 5233.306594] ? xen_error_entry+0x30/0x60 [ 5233.306600] nvmet_ctrl_free+0x140/0x310 [nvmet] [ 5233.306617] nvmet_cq_put+0x74/0x90 [nvmet] [ 5233.306629] nvmet_tcp_release_queue_work+0x19f/0x360 [nvmet_tcp] [ 5233.306634] process_one_work+0x206/0x6e0 [ 5233.306640] worker_thread+0x184/0x320 [ 5233.306643] ? __pfx_worker_thread+0x10/0x10 [ 5233.306646] kthread+0xf1/0x130 [ 5233.306648] ? __pfx_kthread+0x10/0x10 [ 5233.306651] ret_from_fork+0x355/0x450 [ 5233.306653] ? __pfx_kthread+0x10/0x10 [ 5233.306656] ret_from_fork_asm+0x1a/0x30 [ 5233.306664] There is also no need to flush async_event_work from controller teardown. The admin queue teardown already fails outstanding AER requests before the final controller put :- nvmet_sq_destroy(admin sq) nvmet_async_events_failall(ctrl) The controller has already been removed from the subsystem list before nvmet_ctrl_free() quiesces outstanding work. Replace flush_work() with cancel_work_sync() so a pending async_event_work item is canceled and a running instance is waited on without recursing into the same workqueue. Fixes: 06406d81a2d7 ("nvmet: cancel fatal error and flush async work before free controller") Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Chaitanya Kulkarni Signed-off-by: Keith Busch Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 1c5b6bab47791..1567b35f96e37 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1746,7 +1746,7 @@ static void nvmet_ctrl_free(struct kref *ref) nvmet_stop_keep_alive_timer(ctrl); - flush_work(&ctrl->async_event_work); + cancel_work_sync(&ctrl->async_event_work); cancel_work_sync(&ctrl->fatal_err_work); nvmet_destroy_auth(ctrl); From 3df75fff46b1517eb479d8e6b8e3500763715dd0 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 1 May 2026 01:38:37 +0200 Subject: [PATCH 451/554] openvswitch: vport: fix self-deadlock on release of tunnel ports commit aa69918bd418e700309fdd08509dba324fb24296 upstream. vports are used concurrently and protected by RCU, so netdev_put() must happen after the RCU grace period. So, either in an RCU call or after the synchronize_net(). The rtnl_delete_link() must happen under RTNL and so can't be executed in RCU context. Calling synchronize_net() while holding RTNL is not a good idea for performance and system stability under load in general, so calling netdev_put() in RCU call is the right solution here. However, when the device is deleted, rtnl_unlock() will call netdev_run_todo() and block until all the references are gone. In the current code this means that we never reach the call_rcu() and the vport is never freed and the reference is never released, causing a self-deadlock on device removal. Fix that by moving the rcu_call() before the rtnl_unlock(), so the scheduled RCU callback will be executed when synchronize_net() is called from the rtnl_unlock()->netdev_run_todo() while the RTNL itself is already released. Fixes: 6931d21f87bc ("openvswitch: defer tunnel netdev_put to RCU release") Cc: stable@vger.kernel.org Acked-by: Eelco Chaudron Signed-off-by: Ilya Maximets Acked-by: Aaron Conole Link: https://patch.msgid.link/20260430233848.440994-2-i.maximets@ovn.org Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/vport-netdev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 12055af832dc0..a1df551e915bc 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -196,9 +196,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev, 0, NULL); - rtnl_unlock(); + /* We can't put the device reference yet, since it can still be in + * use, but rtnl_unlock()->netdev_run_todo() will block until all + * the references are released, so the RCU call must be before it. + */ call_rcu(&vport->rcu, vport_netdev_free); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy); From 51a7dd9cbae9210335ce398642ecaaa52c939eb5 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 17 Apr 2026 13:13:31 +0200 Subject: [PATCH 452/554] pmdomain: core: Fix detach procedure for virtual devices in genpd commit 26735dfdd8930d9ef1fa92e590a9bf77726efdf6 upstream. If a device is attached to a PM domain through genpd_dev_pm_attach_by_id(), genpd calls pm_runtime_enable() for the corresponding virtual device that it registers. While this avoids boilerplate code in drivers, there is no corresponding call to pm_runtime_disable() in genpd_dev_pm_detach(). This means these virtual devices are typically detached from its genpd, while runtime PM remains enabled for them, which is not how things are designed to work. In worst cases it may lead to critical errors, like a NULL pointer dereference bug in genpd_runtime_suspend(), which was recently reported. For another case, we may end up keeping an unnecessary vote for a performance state for the device. To fix these problems, let's add this missing call to pm_runtime_disable() in genpd_dev_pm_detach(). Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdWapT40hV3c+CSBqFOW05aWcV1a6v_NiJYgoYi0i9_PDQ@mail.gmail.com/ Fixes: 3c095f32a92b ("PM / Domains: Add support for multi PM domains per device to genpd") Cc: stable@vger.kernel.org Tested-by: Geert Uytterhoeven Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/pmdomain/core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c index 61c2277c9ce39..5fd08a5118cb8 100644 --- a/drivers/pmdomain/core.c +++ b/drivers/pmdomain/core.c @@ -3074,6 +3074,7 @@ static const struct bus_type genpd_bus_type = { static void genpd_dev_pm_detach(struct device *dev, bool power_off) { struct generic_pm_domain *pd; + bool is_virt_dev; unsigned int i; int ret = 0; @@ -3083,6 +3084,13 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) dev_dbg(dev, "removing from PM domain %s\n", pd->name); + /* Check if the device was created by genpd at attach. */ + is_virt_dev = dev->bus == &genpd_bus_type; + + /* Disable runtime PM if we enabled it at attach. */ + if (is_virt_dev) + pm_runtime_disable(dev); + /* Drop the default performance state */ if (dev_gpd_data(dev)->default_pstate) { dev_pm_genpd_set_performance_state(dev, 0); @@ -3108,7 +3116,7 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) genpd_queue_power_off_work(pd); /* Unregister the device if it was created by genpd. */ - if (dev->bus == &genpd_bus_type) + if (is_virt_dev) device_unregister(dev); } From ac4bf66686bbbd14f74f8c791e5849995fccfa56 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sat, 2 May 2026 15:19:45 +0100 Subject: [PATCH 453/554] psp: strip variable-length PSP header in psp_dev_rcv() commit 30cb24f97d44f6b81c14b85c5323de62eef1fb7f upstream. psp_dev_rcv() unconditionally removes a fixed PSP_ENCAP_HLEN, even when psph->hdrlen indicates that the PSP header carries optional fields. A frame whose PSP header advertises a non-zero VC or any extension would therefore be silently mis-decapsulated: option bytes would spill into the inner packet head and downstream parsing would fail on a corrupted skb. Compute the full PSP header length from psph->hdrlen, pull the optional bytes into the linear region, and strip the whole header when decapsulating. Optional fields (VC, ...) are still ignored, just discarded with the rest of the header instead of leaking. crypt_offset and the VIRT flag are intentionally not validated here - callers know their device's PSP implementation and can decide. Both in-tree callers gate on hardware-validated PSP, so this is a correctness fix rather than a reachable corruption path under current configurations. Fixes: 0eddb8023cee ("psp: provide decapsulation and receive helper for drivers") Reviewed-by: Willem de Bruijn Reviewed-by: Daniel Zahka Cc: stable@vger.kernel.org Signed-off-by: David Carlier Link: https://patch.msgid.link/20260502141945.14484-1-devnexen@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/psp/psp_main.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c index 53c0313633689..e35c31977479c 100644 --- a/net/psp/psp_main.c +++ b/net/psp/psp_main.c @@ -262,15 +262,16 @@ EXPORT_SYMBOL(psp_dev_encapsulate); /* Receive handler for PSP packets. * - * Presently it accepts only already-authenticated packets and does not - * support optional fields, such as virtualization cookies. The caller should - * ensure that skb->data is pointing to the mac header, and that skb->mac_len - * is set. This function does not currently adjust skb->csum (CHECKSUM_COMPLETE - * is not supported). + * Accepts only already-authenticated packets. The full PSP header is + * stripped according to psph->hdrlen; any optional fields it advertises + * (virtualization cookies, etc.) are ignored and discarded along with the + * rest of the header. The caller should ensure that skb->data is pointing + * to the mac header, and that skb->mac_len is set. This function does not + * currently adjust skb->csum (CHECKSUM_COMPLETE is not supported). */ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) { - int l2_hlen = 0, l3_hlen, encap; + int l2_hlen = 0, l3_hlen, encap, psp_hlen; struct psp_skb_ext *pse; struct psphdr *psph; struct ethhdr *eth; @@ -311,18 +312,36 @@ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) if (unlikely(uh->dest != htons(PSP_DEFAULT_UDP_PORT))) return -EINVAL; - pse = skb_ext_add(skb, SKB_EXT_PSP); - if (!pse) + psph = (struct psphdr *)(skb->data + l2_hlen + l3_hlen + + sizeof(struct udphdr)); + + /* Strip the full PSP header per psph->hdrlen; VC/options are pulled + * into the linear region only so they can be discarded with the + * rest of the header. + */ + psp_hlen = (psph->hdrlen + 1) * 8; + + if (unlikely(psp_hlen < sizeof(struct psphdr))) + return -EINVAL; + + if (psp_hlen > sizeof(struct psphdr) && + !pskb_may_pull(skb, l2_hlen + l3_hlen + + sizeof(struct udphdr) + psp_hlen)) return -EINVAL; psph = (struct psphdr *)(skb->data + l2_hlen + l3_hlen + sizeof(struct udphdr)); + + pse = skb_ext_add(skb, SKB_EXT_PSP); + if (!pse) + return -EINVAL; + pse->spi = psph->spi; pse->dev_id = dev_id; pse->generation = generation; pse->version = FIELD_GET(PSPHDR_VERFL_VERSION, psph->verfl); - encap = PSP_ENCAP_HLEN; + encap = sizeof(struct udphdr) + psp_hlen; encap += strip_icv ? PSP_TRL_SIZE : 0; if (proto == htons(ETH_P_IP)) { @@ -339,8 +358,9 @@ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) - encap); } - memmove(skb->data + PSP_ENCAP_HLEN, skb->data, l2_hlen + l3_hlen); - skb_pull(skb, PSP_ENCAP_HLEN); + memmove(skb->data + sizeof(struct udphdr) + psp_hlen, + skb->data, l2_hlen + l3_hlen); + skb_pull(skb, sizeof(struct udphdr) + psp_hlen); if (strip_icv) pskb_trim(skb, skb->len - PSP_TRL_SIZE); From 1912f78798505dc9c637081bbddfbf1c22494c49 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:48 -0300 Subject: [PATCH 454/554] RDMA/hns: Fix unlocked call to hns_roce_qp_remove() commit 0c99acbc8b6c6dd526ae475a48ee1897b61072fb upstream. Sashiko points out that hns_roce_qp_remove() requires the caller to hold locks. The error flow in hns_roce_create_qp_common() doesn't hold those locks for the error unwind so it risks corrupting memory. Grab the same locks the other two callers use. Cc: stable@vger.kernel.org Fixes: e088a685eae9 ("RDMA/hns: Support rq record doorbell for the user space") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=9 Link: https://patch.msgid.link/r/15-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reviewed-by: Junxian Huang Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hns/hns_roce_qp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index bdd879ac12dda..5c475ac19e3ca 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1150,6 +1150,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct hns_roce_ib_create_qp_resp resp = {}; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_qp ucmd = {}; + unsigned long flags; int ret; mutex_init(&hr_qp->mutex); @@ -1236,7 +1237,13 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, return 0; err_flow_ctrl: + spin_lock_irqsave(&hr_dev->qp_list_lock, flags); + hns_roce_lock_cqs(init_attr->send_cq ? to_hr_cq(init_attr->send_cq) : NULL, + init_attr->recv_cq ? to_hr_cq(init_attr->recv_cq) : NULL); hns_roce_qp_remove(hr_dev, hr_qp); + hns_roce_unlock_cqs(init_attr->send_cq ? to_hr_cq(init_attr->send_cq) : NULL, + init_attr->recv_cq ? to_hr_cq(init_attr->recv_cq) : NULL); + spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); err_store: free_qpc(hr_dev, hr_qp); err_qpc: From bd62c0f61bc722a097417401030c596cea8e21aa Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Mon, 16 Mar 2026 16:16:11 +0100 Subject: [PATCH 455/554] riscv: kvm: fix vector context allocation leak commit b7c958d7c1eb1cb9b2be7b5ee4129fcd66cec978 upstream. When the second kzalloc (host_context.vector.datap) fails in kvm_riscv_vcpu_alloc_vector_context, the first allocation (guest_context.vector.datap) is leaked. Free it before returning. Fixes: 0f4b82579716 ("riscv: KVM: Add vector lazy save/restore support") Cc: stable@vger.kernel.org Signed-off-by: Osama Abdelkader Reviewed-by: Andy Chiu Link: https://lore.kernel.org/r/20260316151612.13305-1-osama.abdelkader@gmail.com Signed-off-by: Anup Patel Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kvm/vcpu_vector.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c index 05f3cc2d8e311..5b6ad82d47beb 100644 --- a/arch/riscv/kvm/vcpu_vector.c +++ b/arch/riscv/kvm/vcpu_vector.c @@ -80,8 +80,11 @@ int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu) return -ENOMEM; vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL); - if (!vcpu->arch.host_context.vector.datap) + if (!vcpu->arch.host_context.vector.datap) { + kfree(vcpu->arch.guest_context.vector.datap); + vcpu->arch.guest_context.vector.datap = NULL; return -ENOMEM; + } return 0; } From e3eee7ea5ec57c1910afc45dbe3301b21dee1ebb Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Apr 2026 14:33:43 +0200 Subject: [PATCH 456/554] s390/debug: Reject zero-length input in debug_input_flush_fn() commit e14622a7584f9608927c59a7d6ae4a0999dc545e upstream. debug_input_flush_fn() always copies one byte from the userspace buffer with copy_from_user() regardless of the supplied write length. A zero-length write therefore reads one byte beyond the caller's buffer. If the stale byte happens to be '-' or a digit the debug log is silently flushed. With an unmapped buffer the call returns -EFAULT. Reject zero-length writes before copying from userspace. Cc: stable@vger.kernel.org # v5.10+ Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/debug.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 6a26f202441d3..c8f05456f32cd 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1586,6 +1586,11 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, char input_buf[1]; int rc = user_len; + if (!user_len) { + rc = -EINVAL; + goto out; + } + if (user_len > 0x10000) user_len = 0x10000; if (*offset != 0) { From cf90a96b7a2d769c2ec88048f94b33539a362bf5 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Fri, 17 Apr 2026 15:35:30 +0800 Subject: [PATCH 457/554] s390/debug: Reject zero-length input before trimming a newline commit c366a7b5ed7564e41345c380285bd3f6cb98971b upstream. debug_get_user_string() duplicates the userspace buffer with memdup_user_nul() and then unconditionally looks at buffer[user_len - 1] to strip a trailing newline. A zero-length write reaches this helper unchanged, so the newline trim reads before the start of the allocated buffer. Reject empty writes before accessing the last input byte. Fixes: 66a464dbc8e0 ("[PATCH] s390: debug feature changes") Cc: stable@vger.kernel.org Signed-off-by: Pengpeng Hou Reviewed-by: Benjamin Block Reviewed-by: Vasily Gorbik Tested-by: Vasily Gorbik Link: https://lore.kernel.org/r/20260417073530.96002-1-pengpeng@iscas.ac.cn Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/debug.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index c8f05456f32cd..112cf7b40dace 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1416,6 +1416,9 @@ static inline char *debug_get_user_string(const char __user *user_buf, { char *buffer; + if (!user_len) + return ERR_PTR(-EINVAL); + buffer = memdup_user_nul(user_buf, user_len); if (IS_ERR(buffer)) return buffer; From 45dcc815fc5539e88154315f36cbcb11d3a52fc2 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Tue, 14 Apr 2026 16:38:11 +0530 Subject: [PATCH 458/554] scsi: mpt3sas: Limit NVMe request size to 2 MiB commit 04631f55afc543d5431a2bdee7f6cc0f2c0debe7 upstream. The HBA firmware reports NVMe MDTS values based on the underlying drive capability. However, because the driver allocates a fixed 4K buffer for the PRP list, accommodating at most 512 entries, the driver supports a maximum I/O transfer size of 2 MiB. Limit max_hw_sectors to the smaller of the reported MDTS and the 2 MiB driver limit to prevent issuing oversized I/O that may lead to a kernel oops. Cc: stable@vger.kernel.org Fixes: 9b8b84879d4a ("block: Increase BLK_DEF_MAX_SECTORS_CAP") Reported-by: Mira Limbeck Closes: https://lore.kernel.org/r/291f78bf-4b4a-40dd-867d-053b36c564b3@proxmox.com Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9b8b84879d4a Suggested-by: Keith Busch Signed-off-by: Ranjan Kumar Tested-by: Mira Limbeck Link: https://patch.msgid.link/20260414110811.85156-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 7092d0debef39..43ae997242182 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2681,8 +2681,20 @@ scsih_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim) pcie_device->enclosure_level, pcie_device->connector_name); + /* + * The HBA firmware passes the NVMe drive's MDTS + * (Maximum Data Transfer Size) up to the driver. However, + * the driver hardcodes a 4K buffer size for the PRP list, + * accommodating at most 512 entries. This strictly limits + * the maximum supported NVMe I/O transfer to 2 MiB. + * + * Cap max_hw_sectors to the smaller of the drive's reported + * MDTS or the 2 MiB driver limit to prevent kernel oopses. + */ + lim->max_hw_sectors = SZ_2M >> SECTOR_SHIFT; if (pcie_device->nvme_mdts) - lim->max_hw_sectors = pcie_device->nvme_mdts / 512; + lim->max_hw_sectors = min(lim->max_hw_sectors, + pcie_device->nvme_mdts >> SECTOR_SHIFT); pcie_device_put(pcie_device); spin_unlock_irqrestore(&ioc->pcie_device_lock, flags); From 512d33bc8ea4ea5c19728ee118715f4b1f4d1926 Mon Sep 17 00:00:00 2001 From: Zisen Ye Date: Wed, 6 May 2026 11:49:08 +0800 Subject: [PATCH 459/554] smb/client: fix out-of-bounds read in smb2_compound_op() commit 8d09328dfda089675e4c049f3f256064a1d1996b upstream. If a server sends a truncated response but a large OutputBufferLength, and terminates the EA list early, check_wsl_eas() returns success without validating that the entire OutputBufferLength fits within iov_len. Then smb2_compound_op() does: memcpy(idata->wsl.eas, data[0], size[0]); Where size[0] is OutputBufferLength. If iov_len is smaller than size[0], memcpy can read beyond the end of the rsp_iov allocation and leak adjacent kernel heap memory. Link: https://lore.kernel.org/linux-cifs/d998240c-aca9-420d-9dbd-f5ba24af19e0@chenxiaosong.com/ Fixes: ea41367b2a60 ("smb: client: introduce SMB2_OP_QUERY_WSL_EA") Cc: stable@vger.kernel.org Signed-off-by: Zisen Ye Reviewed-by: ChenXiaoSong Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2inode.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 17052b988951f..e13bc8a97c449 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -108,7 +108,7 @@ static int check_wsl_eas(struct kvec *rsp_iov) u32 outlen, next; u16 vlen; u8 nlen; - u8 *end; + u8 *ea_end, *iov_end; outlen = le32_to_cpu(rsp->OutputBufferLength); if (outlen < SMB2_WSL_MIN_QUERY_EA_RESP_SIZE || @@ -117,15 +117,19 @@ static int check_wsl_eas(struct kvec *rsp_iov) ea = (void *)((u8 *)rsp_iov->iov_base + le16_to_cpu(rsp->OutputBufferOffset)); - end = (u8 *)rsp_iov->iov_base + rsp_iov->iov_len; + ea_end = (u8 *)ea + outlen; + iov_end = (u8 *)rsp_iov->iov_base + rsp_iov->iov_len; + if (ea_end > iov_end) + return -EINVAL; + for (;;) { - if ((u8 *)ea > end - sizeof(*ea)) + if ((u8 *)ea > ea_end - sizeof(*ea)) return -EINVAL; nlen = ea->ea_name_length; vlen = le16_to_cpu(ea->ea_value_length); if (nlen != SMB2_WSL_XATTR_NAME_LEN || - (u8 *)ea->ea_data + nlen + 1 + vlen > end) + (u8 *)ea->ea_data + nlen + 1 + vlen > ea_end) return -EINVAL; switch (vlen) { From b8c8a704f0bc133deb171f6aeb6f3a684203e212 Mon Sep 17 00:00:00 2001 From: Zisen Ye Date: Sat, 2 May 2026 18:48:36 +0800 Subject: [PATCH 460/554] smb/client: fix out-of-bounds read in symlink_data() commit d62b8d236fab503c6fec1d3e9a38bea71feaca20 upstream. Since smb2_check_message() returns success without length validation for the symlink error response, in symlink_data() it is possible for iov->iov_len to be smaller than sizeof(struct smb2_err_rsp). If the buffer only contains the base SMB2 header (64 bytes), accessing err->ErrorContextCount (at offset 66) or err->ByteCount later in symlink_data() will cause an out-of-bounds read. Link: https://lore.kernel.org/linux-cifs/297d8d9b-adf7-42fd-a1c2-5b1f230032bc@chenxiaosong.com/ Fixes: 76894f3e2f71 ("cifs: improve symlink handling for smb2+") Cc: Stable@vger.kernel.org Signed-off-by: Zisen Ye Reviewed-by: ChenXiaoSong Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2misc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index a6fc8ff398ebf..0b37bc2a7f1c3 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -240,7 +240,8 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) if (len != calc_len) { /* create failed on symlink */ if (command == SMB2_CREATE_HE && - shdr->Status == STATUS_STOPPED_ON_SYMLINK) + shdr->Status == STATUS_STOPPED_ON_SYMLINK && + len > calc_len) return 0; /* Windows 7 server returns 24 bytes more */ if (calc_len + 24 == len && command == SMB2_OPLOCK_BREAK_HE) From be1ef9512a3f5a755895c24f31b334342f4aa15b Mon Sep 17 00:00:00 2001 From: Bjoern Doebel Date: Thu, 30 Apr 2026 08:57:17 +0000 Subject: [PATCH 461/554] smb: client: use kzalloc to zero-initialize security descriptor buffer commit 5e489c6c47a2ac15edbaca153b9348e42c1eacab upstream. Commit 62e7dd0a39c2d ("smb: common: change the data type of num_aces to le16") split struct smb_acl's __le32 num_aces field into __le16 num_aces and __le16 reserved. The reserved field corresponds to Sbz2 in the MS-DTYP ACL wire format, which must be zero [1]. When building an ACL descriptor in build_sec_desc(), we are using a kmalloc()'ed descriptor buffer and writing the fields explicitly using le16() writes now. This never writes to the 2 byte reserved field, leaving it as uninitialized heap data. When the reserved field happens to contain non-zero slab garbage, Samba rejects the security descriptor with "ndr_pull_security_descriptor failed: Range Error", causing chmod to fail with EINVAL. Change kmalloc() to kzalloc() to ensure the entire buffer is zero-initialized. Fixes: 62e7dd0a39c2d ("smb: common: change the data type of num_aces to le16") Cc: stable@vger.kernel.org Signed-off-by: Bjoern Doebel Assisted-by: Kiro:claude-opus-4.6 [1] https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-dtyp/20233ed8-a6c6-4097-aafa-dd545ed24428 Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifsacl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 1d9e2e742ed71..5b43d5ddfd94a 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -1685,7 +1685,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, * descriptor parameters, and security descriptor itself */ nsecdesclen = max_t(u32, nsecdesclen, DEFAULT_SEC_DESC_LEN); - pnntsd = kmalloc(nsecdesclen, GFP_KERNEL); + pnntsd = kzalloc(nsecdesclen, GFP_KERNEL); if (!pnntsd) { kfree(pntsd); cifs_put_tlink(tlink); From c688f3ed73d31943334ad2139cb02ec49664322a Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Mon, 20 Apr 2026 10:47:47 -0400 Subject: [PATCH 462/554] smb: client: validate dacloffset before building DACL pointers commit f98b48151cc502ada59d9778f0112d21f2586ca3 upstream. parse_sec_desc(), build_sec_desc(), and the chown path in id_mode_to_cifs_acl() all add the server-supplied dacloffset to pntsd before proving a DACL header fits inside the returned security descriptor. On 32-bit builds a malicious server can return dacloffset near U32_MAX, wrap the derived DACL pointer below end_of_acl, and then slip past the later pointer-based bounds checks. build_sec_desc() and id_mode_to_cifs_acl() can then dereference DACL fields from the wrapped pointer in the chmod/chown rewrite paths. Validate dacloffset numerically before building any DACL pointer and reuse the same helper at the three DACL entry points. Fixes: bc3e9dd9d104 ("cifs: Change SIDs in ACEs while transferring file ownership.") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-6 Signed-off-by: Michael Bommarito Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifsacl.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 5b43d5ddfd94a..e294883558709 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -1215,6 +1215,17 @@ static int parse_sid(struct smb_sid *psid, char *end_of_acl) return 0; } +static bool dacl_offset_valid(unsigned int acl_len, __u32 dacloffset) +{ + if (acl_len < sizeof(struct smb_acl)) + return false; + + if (dacloffset < sizeof(struct smb_ntsd)) + return false; + + return dacloffset <= acl_len - sizeof(struct smb_acl); +} + /* Convert CIFS ACL to POSIX form */ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, @@ -1235,7 +1246,6 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, group_sid_ptr = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->gsidoffset)); dacloffset = le32_to_cpu(pntsd->dacloffset); - dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); cifs_dbg(NOISY, "revision %d type 0x%x ooffset 0x%x goffset 0x%x sacloffset 0x%x dacloffset 0x%x\n", pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset), le32_to_cpu(pntsd->gsidoffset), @@ -1266,11 +1276,18 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, return rc; } - if (dacloffset) + if (dacloffset) { + if (!dacl_offset_valid(acl_len, dacloffset)) { + cifs_dbg(VFS, "Server returned illegal DACL offset\n"); + return -EINVAL; + } + + dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, group_sid_ptr, fattr, get_mode_from_special_sid); - else + } else { cifs_dbg(FYI, "no ACL\n"); /* BB grant all or default perms? */ + } return rc; } @@ -1293,6 +1310,11 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, dacloffset = le32_to_cpu(pntsd->dacloffset); if (dacloffset) { + if (!dacl_offset_valid(secdesclen, dacloffset)) { + cifs_dbg(VFS, "Server returned illegal DACL offset\n"); + return -EINVAL; + } + dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); if (end_of_acl < (char *)dacl_ptr + le16_to_cpu(dacl_ptr->size)) { cifs_dbg(VFS, "Server returned illegal ACL size\n"); @@ -1669,6 +1691,12 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, nsecdesclen = sizeof(struct smb_ntsd) + (sizeof(struct smb_sid) * 2); dacloffset = le32_to_cpu(pntsd->dacloffset); if (dacloffset) { + if (!dacl_offset_valid(secdesclen, dacloffset)) { + cifs_dbg(VFS, "Server returned illegal DACL offset\n"); + rc = -EINVAL; + goto id_mode_to_cifs_acl_exit; + } + dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); if (mode_from_sid) nsecdesclen += @@ -1705,6 +1733,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, rc = ops->set_acl(pnntsd, nsecdesclen, inode, path, aclflag); cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc); } +id_mode_to_cifs_acl_exit: cifs_put_tlink(tlink); kfree(pnntsd); From d6f4e217d663ede5becc2fd6cb612c749677387b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 27 Apr 2026 14:25:40 +0200 Subject: [PATCH 463/554] KVM: x86: check for nEPT/nNPT in slow flush hypercalls commit 464af6fc2b1dcc74005b7f58ee3812b17777efee upstream. Checking is_guest_mode(vcpu) is incorrect, because translate_nested_gpa() is only valid if an L2 guest is running *with nested EPT/NPT enabled*. Instead use the same condition as translate_nested_gpa() itself. Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Fixes: aee738236dca ("KVM: x86: Prepare kvm_hv_flush_tlb() to handle L2's GPAs", 2022-11-18) Link: https://patch.msgid.link/20260503200905.106077-1-pbonzini@redhat.com/ Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/hyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 38595ecb990d4..f279561154bc9 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2039,7 +2039,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) * flush). Translate the address here so the memory can be uniformly * read with kvm_read_guest(). */ - if (!hc->fast && is_guest_mode(vcpu)) { + if (!hc->fast && mmu_is_nested(vcpu)) { hc->ingpa = translate_nested_gpa(vcpu, hc->ingpa, 0, NULL); if (unlikely(hc->ingpa == INVALID_GPA)) return HV_STATUS_INVALID_HYPERCALL_INPUT; From bb1703949dcaa9a49c338dee075f659f4634214d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 3 May 2026 19:19:32 +0200 Subject: [PATCH 464/554] KVM: x86: Do IRR scan in __kvm_apic_update_irr even if PIR is empty commit 33fd0ccd2590b470b65adcca288615ad3b5e3e06 upstream. Fall back to apic_find_highest_vector() when PID.ON is set but PIR turns out to be empty, to correctly report the highest pending interrupt from the existing IRR. In a nested VM stress test, the following WARNING fires in vmx_check_nested_events() when kvm_cpu_has_interrupt() reports a pending interrupt but the subsequent kvm_apic_has_interrupt() (which invokes vmx_sync_pir_to_irr() again) returns -1: WARNING: CPU: 99 PID: 57767 at arch/x86/kvm/vmx/nested.c:4449 vmx_check_nested_events+0x6bf/0x6e0 [kvm_intel] Call Trace: kvm_check_and_inject_events vcpu_enter_guest.constprop.0 vcpu_run kvm_arch_vcpu_ioctl_run kvm_vcpu_ioctl __x64_sys_ioctl do_syscall_64 entry_SYSCALL_64_after_hwframe The root cause is a race between vmx_sync_pir_to_irr() on the target vCPU and __vmx_deliver_posted_interrupt() on a sender vCPU. The sender performs two individually-atomic operations that are not a single transaction: 1. pi_test_and_set_pir(vector) -- sets the PIR bit 2. pi_test_and_set_on() -- sets PID.ON The following interleaving triggers the bug: Sender vCPU (IPI): Target vCPU (1st sync_pir_to_irr): B1: set PIR[vector] A1: pi_clear_on() A2: pi_harvest_pir() -> sees B1 bit A3: xchg() -> consumes bit, PIR=0 (1st sync returns correct max_irr) B2: set PID.ON = 1 Target vCPU (2nd sync_pir_to_irr): C1: pi_test_on() -> TRUE (from B2) C2: pi_clear_on() -> ON=0 C3: pi_harvest_pir() -> PIR empty C4: *max_irr = -1, early return IRR NOT SCANNED The interrupt is not lost (it resides in the IRR from the first sync and is recovered on the next vcpu_enter_guest() iteration), but the incorrect max_irr causes a spurious WARNING and a wasted L2 VM-Enter/VM-Exit cycle. Fixes: b41f8638b9d3 ("KVM: VMX: Isolate pure loads from atomic XCHG when processing PIR") Reported-by: Farrah Chen Analyzed-by: Chenyi Qiang Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/kvm/20260428070349.1633238-1-chenyi.qiang@intel.com/T/ Link: https://patch.msgid.link/20260503201703.108231-2-pbonzini@redhat.com/ Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a9845a1a9cd5d..0e842dad4194a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -669,12 +669,14 @@ bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr) u32 irr_val, prev_irr_val; int max_updated_irr; + if (!pi_harvest_pir(pir, pir_vals)) { + *max_irr = apic_find_highest_vector(regs + APIC_IRR); + return false; + } + max_updated_irr = -1; *max_irr = -1; - if (!pi_harvest_pir(pir, pir_vals)) - return false; - for (i = vec = 0; i <= 7; i++, vec += 32) { u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10); From cb825c22c00268823801e21ea8d7dd1e4d72adfd Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 19 Apr 2026 09:10:02 -0700 Subject: [PATCH 465/554] mm/damon/stat: detect and use fresh enabled value commit f98590bc08d4aea435e1c2213e38bae0d9e9a7bb upstream. DAMON_STAT updates 'enabled' parameter value, which represents the running status of its kdamond, when the user explicitly requests start/stop of the kdamond. The kdamond can, however, be stopped even if the user explicitly requested the stop, if ctx->regions_score_histogram allocation failure at beginning of the execution of the kdamond. Hence, if the kdamond is stopped by the allocation failure, the value of the parameter can be stale. Users could show the stale value and be confused. The problem will only rarely happen in real and common setups because the allocation is arguably too small to fail. Also, unlike the similar bugs that are now fixed in DAMON_RECLAIM and DAMON_LRU_SORT, kdamond can be restarted in this case, because DAMON_STAT force-updates the enabled parameter value for user inputs. The bug is a bug, though. The issue stems from the fact that there are multiple events that can change the status, and following all the events is challenging. Dynamically detect and use the fresh status for the parameters when those are requested. The issue was dicovered [1] by Sashiko. Link: https://lore.kernel.org/20260419161003.79176-4-sj@kernel.org Link: https://lore.kernel.org/20260416040602.88665-1-sj@kernel.org [1] Fixes: 369c415e6073 ("mm/damon: introduce DAMON_STAT module") Signed-off-by: SeongJae Park Cc: Liew Rui Yan Cc: # 6.17.x Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/stat.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/mm/damon/stat.c b/mm/damon/stat.c index eb728a89924e1..57972096d4ebd 100644 --- a/mm/damon/stat.c +++ b/mm/damon/stat.c @@ -19,14 +19,17 @@ static int damon_stat_enabled_store( const char *val, const struct kernel_param *kp); +static int damon_stat_enabled_load(char *buffer, + const struct kernel_param *kp); + static const struct kernel_param_ops enabled_param_ops = { .set = damon_stat_enabled_store, - .get = param_get_bool, + .get = damon_stat_enabled_load, }; static bool enabled __read_mostly = IS_ENABLED( CONFIG_DAMON_STAT_ENABLED_DEFAULT); -module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); +module_param_cb(enabled, &enabled_param_ops, NULL, 0600); MODULE_PARM_DESC(enabled, "Enable of disable DAMON_STAT"); static unsigned long estimated_memory_bandwidth __read_mostly; @@ -265,17 +268,23 @@ static void damon_stat_stop(void) damon_stat_context = NULL; } +static bool damon_stat_enabled(void) +{ + if (!damon_stat_context) + return false; + return damon_is_running(damon_stat_context); +} + static int damon_stat_enabled_store( const char *val, const struct kernel_param *kp) { - bool is_enabled = enabled; int err; err = kstrtobool(val, &enabled); if (err) return err; - if (is_enabled == enabled) + if (damon_stat_enabled() == enabled) return 0; if (!damon_initialized()) @@ -285,16 +294,17 @@ static int damon_stat_enabled_store( */ return 0; - if (enabled) { - err = damon_stat_start(); - if (err) - enabled = false; - return err; - } + if (enabled) + return damon_stat_start(); damon_stat_stop(); return 0; } +static int damon_stat_enabled_load(char *buffer, const struct kernel_param *kp) +{ + return sprintf(buffer, "%c\n", damon_stat_enabled() ? 'Y' : 'N'); +} + static int __init damon_stat_init(void) { int err = 0; From eafd6f5372d29b0dd213799b92c2c9c7ad31d7da Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 23 Apr 2026 08:02:51 -0700 Subject: [PATCH 466/554] mm/damon/sysfs-schemes: protect memcg_path kfree() with damon_sysfs_lock commit 1e68eb96e8beb1abefd12dd22c5637795d8a877e upstream. Patch series "mm/damon/sysfs-schemes: fix use-after-free for [memcg_]path". Reads of 'memcg_path' and 'path' files in DAMON sysfs interface could race with their writes, results in use-after-free. Fix those. This patch (of 2): damon_sysfs_scheme_filter->mmecg_path can be read and written by users, via DAMON sysfs memcg_path file. It can also be indirectly read, for the parameters {on,off}line committing to DAMON. The reads for parameters committing are protected by damon_sysfs_lock to avoid the sysfs files being destroyed while any of the parameters are being read. But the user-driven direct reads and writes are not protected by any lock, while the write is deallocating the memcg_path-pointing buffer. As a result, the readers could read the already freed buffer (user-after-free). Note that the user-reads don't race when the same open file is used by the writer, due to kernfs's open file locking. Nonetheless, doing the reads and writes with separate open files would be common. Fix it by protecting both the user-direct reads and writes with damon_sysfs_lock. Link: https://lore.kernel.org/20260423150253.111520-1-sj@kernel.org Link: https://lore.kernel.org/20260423150253.111520-2-sj@kernel.org Fixes: 4f489fe6afb3 ("mm/damon/sysfs-schemes: free old damon_sysfs_scheme_filter->memcg_path on write") Co-developed-by: Junxi Qian Signed-off-by: Junxi Qian Signed-off-by: SeongJae Park Cc: # 6.16.x Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/sysfs-schemes.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 50d000d61c907..ca82dc78f0b98 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -494,9 +494,14 @@ static ssize_t memcg_path_show(struct kobject *kobj, { struct damon_sysfs_scheme_filter *filter = container_of(kobj, struct damon_sysfs_scheme_filter, kobj); + int len; - return sysfs_emit(buf, "%s\n", + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + len = sysfs_emit(buf, "%s\n", filter->memcg_path ? filter->memcg_path : ""); + mutex_unlock(&damon_sysfs_lock); + return len; } static ssize_t memcg_path_store(struct kobject *kobj, @@ -511,8 +516,13 @@ static ssize_t memcg_path_store(struct kobject *kobj, return -ENOMEM; strscpy(path, buf, count + 1); + if (!mutex_trylock(&damon_sysfs_lock)) { + kfree(path); + return -EBUSY; + } kfree(filter->memcg_path); filter->memcg_path = path; + mutex_unlock(&damon_sysfs_lock); return count; } From 185d51ec3a54a1fc368908eafbec4d663eb5fdfb Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 15 Apr 2026 17:56:06 +0200 Subject: [PATCH 467/554] PCI: Update saved_config_space upon resource assignment commit 909f7bf9b080c10df3c3b38533906dbf09ff1d8b upstream. Bernd reports passthrough failure of a Digital Devices Cine S2 V6 DVB adapter plugged into an ASRock X570S PG Riptide board with BIOS version P5.41 (09/07/2023): ddbridge 0000:05:00.0: detected Digital Devices Cine S2 V6 DVB adapter ddbridge 0000:05:00.0: cannot read registers ddbridge 0000:05:00.0: fail BIOS assigns an incorrect BAR to the DVB adapter which doesn't fit into the upstream bridge window. The kernel corrects the BAR assignment: pci 0000:07:00.0: BAR 0 [mem 0xfffffffffc500000-0xfffffffffc50ffff 64bit]: can't claim; no compatible bridge window pci 0000:07:00.0: BAR 0 [mem 0xfc500000-0xfc50ffff 64bit]: assigned Correction of the BAR assignment happens in an x86-specific fs_initcall, pcibios_assign_resources(), after device enumeration in a subsys_initcall. This order was introduced at the behest of Linus in 2004: https://git.kernel.org/tglx/history/c/a06a30144bbc No other architecture performs such a late BAR correction. Bernd bisected the issue to commit a2f1e22390ac ("PCI/ERR: Ensure error recoverability at all times"), but it only occurs in the absence of commit 4d4c10f763d7 ("PCI: Explicitly put devices into D0 when initializing"). This combination exists in stable kernel v6.12.70, but not in mainline, hence Bernd cannot reproduce the issue with mainline. Since a2f1e22390ac, config space is saved on enumeration, prior to BAR correction. Upon passthrough, the corrected BAR is overwritten with the incorrect saved value by: vfio_pci_core_register_device() vfio_pci_set_power_state() pci_restore_state() But only if the device's current_state is PCI_UNKNOWN, as it was prior to commit 4d4c10f763d7. Since the commit, it is PCI_D0, which changes the behavior of vfio_pci_set_power_state() to no longer restore the state without saving it first. Alexandre is reporting the same issue as Bernd, but in his case, mainline is affected as well. The difference is that on Alexandre's system, the host kernel binds a driver to the device which is unbound prior to passthrough, whereas on Bernd's system no driver gets bound by the host kernel. Unbinding sets current_state to PCI_UNKNOWN in pci_device_remove(), so when vfio-pci is subsequently bound to the device, pci_restore_state() is once again called without invoking pci_save_state() first. To robustly fix the issue, always update saved_config_space upon resource assignment. Reported-by: Bernd Schumacher Closes: https://lore.kernel.org/r/acfZrlP0Ua_5D3U4@eldamar.lan/ Reported-by: Alexandre N. Closes: https://lore.kernel.org/r/dd3c3358-de0f-4a56-9c81-04aceaab4058@mailo.com/ Fixes: a2f1e22390ac ("PCI/ERR: Ensure error recoverability at all times") Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Tested-by: Bernd Schumacher Tested-by: Alexandre N. Cc: stable@vger.kernel.org # v6.12+ Link: https://patch.msgid.link/febc3f354e0c1f5a9f5b3ee9ffddaa44caccf651.1776268054.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- drivers/pci/setup-res.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index c3ba4ccecd433..60da129d714f9 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -102,6 +102,7 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) } pci_write_config_dword(dev, reg, new); + dev->saved_config_space[reg / 4] = new; pci_read_config_dword(dev, reg, &check); if ((new ^ check) & mask) { @@ -112,6 +113,7 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) if (res->flags & IORESOURCE_MEM_64) { new = region.start >> 16 >> 16; pci_write_config_dword(dev, reg + 4, new); + dev->saved_config_space[(reg + 4) / 4] = new; pci_read_config_dword(dev, reg + 4, &check); if (check != new) { pci_err(dev, "%s: error updating (high %#010x != %#010x)\n", From 70a5d28dd27320bb5a793787b0b7e4625d4228de Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Wed, 11 Feb 2026 20:46:24 +0800 Subject: [PATCH 468/554] PCI/AER: Clear only error bits in PCIe Device Status commit a8aeea1bf3c80cc87983689e0118770e019bd4f3 upstream. Currently, pcie_clear_device_status() clears the entire PCIe Device Status register (PCI_EXP_DEVSTA) by writing back the value read from the register, which affects not only the error status bits but also other writable bits. According to PCIe r7.0, sec 7.5.3.5, this register contains: - RW1C error status bits (CED, NFED, FED, URD at bits 0-3): These are the four error status bits that need to be cleared. - Read-only bits (AUXPD at bit 4, TRPND at bit 5): Writing to these has no effect. - Emergency Power Reduction Detected (bit 6): A RW1C non-error bit introduced in PCIe r5.0 (2019). This is currently the only writable non-error bit in the Device Status register. Unconditionally clearing this bit can interfere with other software components that rely on this power management indication. - Reserved bits (RsvdZ): These bits are required to be written as zero. Writing 1s to them (as the current implementation may do) violates the specification. To prevent unintended side effects, modify pcie_clear_device_status() to only write 1s to the four error status bits (CED, NFED, FED, URD), leaving the Emergency Power Reduction Detected bit and reserved bits unaffected. Fixes: ec752f5d54d7 ("PCI/AER: Clear device status bits during ERR_FATAL and ERR_NONFATAL") Suggested-by: Lukas Wunner Signed-off-by: Shuai Xue Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Lukas Wunner Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260211124624.49656-1-xueshuai@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e128696d5b761..a62ed5560c68f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2272,10 +2272,9 @@ EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state); #ifdef CONFIG_PCIEAER void pcie_clear_device_status(struct pci_dev *dev) { - u16 sta; - - pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); - pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); + pcie_capability_write_word(dev, PCI_EXP_DEVSTA, + PCI_EXP_DEVSTA_CED | PCI_EXP_DEVSTA_NFED | + PCI_EXP_DEVSTA_FED | PCI_EXP_DEVSTA_URD); } #endif From 24582f5044bf9e7deff923efa2bc364eb612268c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 27 Mar 2026 10:56:43 +0100 Subject: [PATCH 469/554] PCI/AER: Stop ruling out unbound devices as error source commit 1ab4a3c805084d752ec571efc78272295a9f2f74 upstream. When searching for the error source, the AER driver rules out devices whose enable_cnt is zero. This was introduced in 2009 by commit 28eb27cf0839 ("PCI AER: support invalid error source IDs") without providing a rationale. Drivers typically call pci_enable_device() on probe, hence the enable_cnt check essentially filters out unbound devices. At the time of the commit, drivers had to opt in to AER by calling pci_enable_pcie_error_reporting() and so any AER-enabled device could be assumed to be bound to a driver. The check thus made sense because it allowed skipping config space accesses to devices which were known not to be the error source. But since 2022, AER is universally enabled on all devices when they are enumerated, cf. commit f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"). Errors may very well be reported by unbound devices, e.g. due to link instability. By ruling them out as error source, errors reported by them are neither logged nor cleared. When they do get bound and another error occurs, the earlier error is reported together with the new error, which may confuse users. Stop doing so. Fixes: f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native") Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Stefan Roese Cc: stable@vger.kernel.org # v6.0+ Link: https://patch.msgid.link/734338c2e8b669db5a5a3b45d34131b55ffebfca.1774605029.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/aer.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 29504173425cd..85f1f0cf62d61 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1034,8 +1034,6 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) * 3) There are multiple errors and prior ID comparing fails; * We check AER status registers to find possible reporter. */ - if (atomic_read(&dev->enable_cnt) == 0) - return false; /* Check if AER is enabled */ pcie_capability_read_word(dev, PCI_EXP_DEVCTL, ®16); From 1bf238ead7cc05519759ec80882886be9348656b Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 16 Feb 2026 08:46:13 +0100 Subject: [PATCH 470/554] PCI/ASPM: Fix pci_clear_and_set_config_dword() usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cc33985d26c92a5c908c0185239c59ec35b8637c upstream. When aspm_calc_l12_info() programs the L1 PM Substates Control 1 register fields Common_Mode_Restore_Time, LTR_L1.2_THRESHOLD_Value and _Scale, it invokes pci_clear_and_set_config_dword() in an incorrect way: For the bits to clear it selects those corresponding to the field. So far so good. But for the bits to set it passes a full register value. pci_clear_and_set_config_dword() performs a boolean OR operation which sets all bits of that value, not just the ones that were just cleared. Thus, when setting the LTR_L1.2_THRESHOLD_Value and _Scale on the child of an ASPM link, aspm_calc_l12_info() also sets the Common_Mode_Restore_Time. That's a spec violation: PCIe r7.0 sec 7.8.3.3 says this field is RsvdP for Upstream Ports. On Adrià's Pixelbook Eve, Common_Mode_Restore_Time of the Intel 7265 "Stone Peak" wifi card is zero, yet aspm_calc_l12_info() does not preserve the zero bits but instead programs the value calculated for the Root Port into the wifi card. Likewise, when setting the Common_Mode_Restore_Time on the Root Port, aspm_calc_l12_info() also changes the LTR_L1.2_THRESHOLD_Value and _Scale from the initial 163840 nsec to 237568 nsec (due to ORing those fields), only to reduce it afterwards to 106496 nsec. Amend all invocations of pci_clear_and_set_config_dword() to only set bits which are cleared. Finally, when setting the T_POWER_ON_Value and _Scale on the Root Port and the wifi card, aspm_calc_l12_info() fails to preserve bits declared RsvdP and instead overwrites them with zeroes. Replace pci_write_config_dword() with pci_clear_and_set_config_dword() to avoid this. Fixes: aeda9adebab8 ("PCI/ASPM: Configure L1 substate settings") Link: https://bugzilla.kernel.org/show_bug.cgi?id=220705#c22 Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Tested-by: Adrià Vilanova Martínez Cc: stable@vger.kernel.org # v4.11+ Link: https://patch.msgid.link/5c1752d7512eed0f4ea57b84b12d7ee08ca61fc5.1771226659.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/aspm.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index cedea47a3547f..a1f275259f7dd 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -706,22 +706,29 @@ static void aspm_calc_l12_info(struct pcie_link_state *link, } /* Program T_POWER_ON times in both ports */ - pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, ctl2); - pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ctl2); + pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, + PCI_L1SS_CTL2_T_PWR_ON_VALUE | + PCI_L1SS_CTL2_T_PWR_ON_SCALE, ctl2); + pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL2, + PCI_L1SS_CTL2_T_PWR_ON_VALUE | + PCI_L1SS_CTL2_T_PWR_ON_SCALE, ctl2); /* Program Common_Mode_Restore_Time in upstream device */ pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, - PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1); + PCI_L1SS_CTL1_CM_RESTORE_TIME, + ctl1 & PCI_L1SS_CTL1_CM_RESTORE_TIME); /* Program LTR_L1.2_THRESHOLD time in both ports */ pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, PCI_L1SS_CTL1_LTR_L12_TH_VALUE | PCI_L1SS_CTL1_LTR_L12_TH_SCALE, - ctl1); + ctl1 & (PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE)); pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1, PCI_L1SS_CTL1_LTR_L12_TH_VALUE | PCI_L1SS_CTL1_LTR_L12_TH_SCALE, - ctl1); + ctl1 & (PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE)); if (pl1_2_enables || cl1_2_enables) { pci_clear_and_set_config_dword(parent, From 4b6886ca8114797ba80afe82ea67f877cf2f26fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Mon, 2 Mar 2026 13:32:05 +0000 Subject: [PATCH 471/554] power: supply: max17042: avoid overflow when determining health MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9a44949da669708f19d29141e65b3ac774d08f5a upstream. If vmax has the default value of INT_MAX (e.g. because not specified in DT), battery health is reported as over-voltage. This is because adding any value to vmax (the vmax tolerance in this case) causes it to wrap around, making it negative and smaller than the measured battery voltage. Avoid that by using size_add(). Fixes: edd4ab055931 ("power: max17042_battery: add HEALTH and TEMP_* properties support") Cc: stable@vger.kernel.org Signed-off-by: André Draszik Link: https://patch.msgid.link/20260302-max77759-fg-v3-6-3c5f01dbda23@linaro.org Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/max17042_battery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c index acea176101fa8..a32ff503f45f7 100644 --- a/drivers/power/supply/max17042_battery.c +++ b/drivers/power/supply/max17042_battery.c @@ -201,7 +201,7 @@ static int max17042_get_battery_health(struct max17042_chip *chip, int *health) goto out; } - if (vbatt > chip->pdata->vmax + MAX17042_VMAX_TOLERANCE) { + if (vbatt > size_add(chip->pdata->vmax, MAX17042_VMAX_TOLERANCE)) { *health = POWER_SUPPLY_HEALTH_OVERVOLTAGE; goto out; } From 2546fb8c9acc8c7512ed4339ce2a982cb7407065 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Wed, 11 Mar 2026 19:13:31 +0530 Subject: [PATCH 472/554] powerpc/xive: fix kmemleak caused by incorrect chip_data lookup commit 6771c54728c278bf1e4bfdab4fddbbb186e33498 upstream. The kmemleak reports the following memory leak: Unreferenced object 0xc0000002a7fbc640 (size 64): comm "kworker/8:1", pid 540, jiffies 4294937872 hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 09 04 00 04 00 00 ................ 00 00 a7 81 00 00 0a c0 00 00 08 04 00 04 00 00 ................ backtrace (crc 177d48f6): __kmalloc_cache_noprof+0x520/0x730 xive_irq_alloc_data.constprop.0+0x40/0xe0 xive_irq_domain_alloc+0xd0/0x1b0 irq_domain_alloc_irqs_parent+0x44/0x6c pseries_irq_domain_alloc+0x1cc/0x354 irq_domain_alloc_irqs_parent+0x44/0x6c msi_domain_alloc+0xb0/0x220 irq_domain_alloc_irqs_locked+0x138/0x4d0 __irq_domain_alloc_irqs+0x8c/0xfc __msi_domain_alloc_irqs+0x214/0x4d8 msi_domain_alloc_irqs_all_locked+0x70/0xf8 pci_msi_setup_msi_irqs+0x60/0x78 __pci_enable_msix_range+0x54c/0x98c pci_alloc_irq_vectors_affinity+0x16c/0x1d4 nvme_pci_enable+0xac/0x9c0 [nvme] nvme_probe+0x340/0x764 [nvme] This occurs when allocating MSI-X vectors for an NVMe device. During allocation the XIVE code creates a struct xive_irq_data and stores it in irq_data->chip_data. When the MSI-X irqdomain is later freed, xive_irq_free_data() is responsible for retrieving this structure and freeing it. However, after commit cc0cc23babc9 ("powerpc/xive: Untangle xive from child interrupt controller drivers"), xive_irq_free_data() retrieves the chip_data using irq_get_chip_data(), which looks up the data through the child domain. This is incorrect because the XIVE-specific irq data is associated with the XIVE (parent) domain. As a result the lookup fails and the allocated struct xive_irq_data is never freed, leading to the kmemleak report shown above. Fix this by retrieving the irq_data from the correct domain using irq_domain_get_irq_data() and then accessing the chip_data via irq_data_get_irq_chip_data(). Cc: stable@vger.kernel.org Fixes: cc0cc23babc9 ("powerpc/xive: Untangle xive from child interrupt controller drivers") Signed-off-by: Nilay Shroff Tested-by: Venkat Rao Bagalkote Reviewed-by: Nam Cao Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260311134336.326996-1-nilay@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/sysdev/xive/common.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 8d0123b0ae841..6bc373cfef8c6 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1038,13 +1038,19 @@ static struct xive_irq_data *xive_irq_alloc_data(unsigned int virq, irq_hw_numbe return xd; } -static void xive_irq_free_data(unsigned int virq) +static void xive_irq_free_data(struct irq_domain *domain, unsigned int virq) { - struct xive_irq_data *xd = irq_get_chip_data(virq); + struct xive_irq_data *xd; + struct irq_data *data = irq_domain_get_irq_data(domain, virq); + + if (!data) + return; + xd = irq_data_get_irq_chip_data(data); if (!xd) return; - irq_set_chip_data(virq, NULL); + + irq_domain_reset_irq_data(data); xive_cleanup_irq_data(xd); kfree(xd); } @@ -1304,7 +1310,7 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq) { - xive_irq_free_data(virq); + xive_irq_free_data(d, virq); } static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct, @@ -1442,7 +1448,7 @@ static void xive_irq_domain_free(struct irq_domain *domain, pr_debug("%s %d #%d\n", __func__, virq, nr_irqs); for (i = 0; i < nr_irqs; i++) - xive_irq_free_data(virq + i); + xive_irq_free_data(domain, virq + i); } #endif From 44905248dd87ee233c8a2fcc23cd6909b92cf6d1 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Thu, 30 Apr 2026 08:25:55 +0800 Subject: [PATCH 473/554] perf/x86/intel: Always reprogram ACR events to prevent stale masks commit 8ba0b706a485b1e607594cf4210786d517ad1611 upstream. Members of an ACR group are logically linked via a bitmask of their hardware counter indices. If some members of the group are assigned new hardware counters during rescheduling, even events that keep their original counter index must be updated with a new mask. Without this, an event will continue to use a stale acr_mask that references the old indices of its group peers. Ensure all ACR events are reprogrammed during the scheduling path to maintain consistency across the group. Fixes: ec980e4facef ("perf/x86/intel: Support auto counter reload") Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260430002558.712334-3-dapeng1.mi@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/core.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8a0cd2ebb60db..330c2ba41f076 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1281,13 +1281,16 @@ int x86_perf_rdpmc_index(struct perf_event *event) return event->hw.event_base_rdpmc; } -static inline int match_prev_assignment(struct hw_perf_event *hwc, +static inline int match_prev_assignment(struct perf_event *event, struct cpu_hw_events *cpuc, int i) { + struct hw_perf_event *hwc = &event->hw; + return hwc->idx == cpuc->assign[i] && - hwc->last_cpu == smp_processor_id() && - hwc->last_tag == cpuc->tags[i]; + hwc->last_cpu == smp_processor_id() && + hwc->last_tag == cpuc->tags[i] && + !is_acr_event_group(event); } static void x86_pmu_start(struct perf_event *event, int flags); @@ -1333,7 +1336,7 @@ static void x86_pmu_enable(struct pmu *pmu) * - no other event has used the counter since */ if (hwc->idx == -1 || - match_prev_assignment(hwc, cpuc, i)) + match_prev_assignment(event, cpuc, i)) continue; /* @@ -1354,7 +1357,7 @@ static void x86_pmu_enable(struct pmu *pmu) event = cpuc->event_list[i]; hwc = &event->hw; - if (!match_prev_assignment(hwc, cpuc, i)) + if (!match_prev_assignment(event, cpuc, i)) x86_assign_hw_event(event, cpuc, i); else if (i < n_running) continue; From 61df14f306f153bffa2f3c74a94ff5a85c99fa39 Mon Sep 17 00:00:00 2001 From: Kai Zen Date: Tue, 7 Apr 2026 12:20:22 +0300 Subject: [PATCH 474/554] RDMA/ionic: bound node_desc sysfs read with %.64s commit 654a27f25530d052eeedf086e6c3e2d585c203bd upstream. node_desc[64] in struct ib_device is not guaranteed to be NUL- terminated. The core IB sysfs handler uses "%.64s" for exactly this reason (drivers/infiniband/core/sysfs.c:1307), since node_desc_store() performs a raw memcpy of up to IB_DEVICE_NODE_DESC_MAX bytes with no NUL termination: memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX)); If exactly 64 bytes are written via the node_desc sysfs file, the array contains no NUL byte. The ionic hca_type_show() handler uses unbounded "%s" and will read past the end of node_desc into adjacent fields of struct ib_device until it encounters a NUL. ionic supports IB_DEVICE_MODIFY_NODE_DESC, so this is triggerable by userspace. Match the core handler and bound the format specifier. Cc: stable@vger.kernel.org Fixes: 2075bbe8ef03 ("RDMA/ionic: Register device ops for miscellaneous functionality") Link: https://patch.msgid.link/r/CALynFi7NAbhDCt1tdaDbf6TnLvAqbaHa6-Wqf6OkzREbA_PAfg@mail.gmail.com Signed-off-by: Kai Aizen Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/ionic/ionic_ibdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c index bd4c73e530d08..0382a64839d26 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.c +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c @@ -185,7 +185,7 @@ static ssize_t hca_type_show(struct device *device, struct ionic_ibdev *dev = rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev); - return sysfs_emit(buf, "%s\n", dev->ibdev.node_desc); + return sysfs_emit(buf, "%s.64\n", dev->ibdev.node_desc); } static DEVICE_ATTR_RO(hca_type); From d886bcdcd00d7fca1ccbde2466d356ee145c416b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:34 -0300 Subject: [PATCH 475/554] RDMA/ionic: Fix typo in format string commit 70f780edcd1e86350202d8a409de026b2d2e2067 upstream. Applying the corrupted patch by hand mangled the format string, put the s in the right place. Cc: stable@vger.kernel.org Fixes: 654a27f25530 ("RDMA/ionic: bound node_desc sysfs read with %.64s") Link: https://patch.msgid.link/r/1-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reported-by: Brad Spengler Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/ionic/ionic_ibdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c index 0382a64839d26..73a616ae35023 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.c +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c @@ -185,7 +185,7 @@ static ssize_t hca_type_show(struct device *device, struct ionic_ibdev *dev = rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev); - return sysfs_emit(buf, "%s.64\n", dev->ibdev.node_desc); + return sysfs_emit(buf, "%.64s\n", dev->ibdev.node_desc); } static DEVICE_ATTR_RO(hca_type); From ab64c63b460bbd0521480bf90d5695783f5e66bc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:40 -0300 Subject: [PATCH 476/554] RDMA/mana: Fix error unwind in mana_ib_create_qp_rss() commit 6aaa978c6b6218cfac15fe1dab17c76fe229ce3f upstream. Sashiko points out that mana_ib_cfg_vport_steering() is leaked, the normal destroy path cleans it up. Cc: stable@vger.kernel.org Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 Link: https://patch.msgid.link/r/7-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reviewed-by: Long Li Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mana/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 960878b53da85..d688395b44982 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -236,13 +236,15 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata create rss-qp, %d\n", ret); - goto fail; + goto err_disable_vport_rx; } kfree(mana_ind_table); return 0; +err_disable_vport_rx: + mana_disable_vport_rx(mpc); fail: while (i-- > 0) { ibwq = ind_tbl->ind_tbl[i]; From bb9cb36eaefa4dcb7c0d9f7a01e5c739abdd53a8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:39 -0300 Subject: [PATCH 477/554] RDMA/mana: Fix mana_destroy_wq_obj() cleanup in mana_ib_create_qp_rss() commit 34ecf795692ee57c393109f4a24ccc313091e137 upstream. Sashiko points out there are two bugs here in the error unwind flow, both related to how the WQ table is unwound. First there is a double i-- on the first failure path due to the while loop having a i--, remove it. Second if mana_ib_install_cq_cb() fails then mana_create_wq_obj() is not undone due to the above i--. Cc: stable@vger.kernel.org Fixes: c15d7802a424 ("RDMA/mana_ib: Add CQ interrupt support for RAW QP") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1 Link: https://patch.msgid.link/r/6-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reviewed-by: Long Li Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mana/qp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index d688395b44982..67177d92f11a1 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -194,11 +194,8 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ, &wq_spec, &cq_spec, &wq->rx_object); - if (ret) { - /* Do cleanup starting with index i-1 */ - i--; + if (ret) goto fail; - } /* The GDMA regions are now owned by the WQ object */ wq->queue.gdma_region = GDMA_INVALID_DMA_REGION; @@ -218,8 +215,10 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, /* Create CQ table entry */ ret = mana_ib_install_cq_cb(mdev, cq); - if (ret) + if (ret) { + mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); goto fail; + } } resp.num_entries = i; From 9ef65af26b2a6738bf15812042e84b3112402d3a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:38 -0300 Subject: [PATCH 478/554] RDMA/mana: Remove user triggerable WARN_ON() in mana_ib_create_qp_rss() commit 159f2efabc89d3f931d38f2d35876535d4abf0a3 upstream. Sashiko points out that the user can specify WQs sharing the same CQ as a part of the uAPI and this will trigger the WARN_ON() then go on to corrupt the kernel. Just reject it outright and fail the QP creation. Cc: stable@vger.kernel.org Fixes: c15d7802a424 ("RDMA/mana_ib: Add CQ interrupt support for RAW QP") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1 Link: https://patch.msgid.link/r/5-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reviewed-by: Long Li Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mana/cq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index 7600412b0739f..da93cbd57e572 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -144,8 +144,9 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) if (cq->queue.id >= gc->max_num_cqs) return -EINVAL; - /* Create CQ table entry */ - WARN_ON(gc->cq_table[cq->queue.id]); + /* Create CQ table entry, sharing a CQ between WQs is not supported */ + if (gc->cq_table[cq->queue.id]) + return -EINVAL; if (cq->queue.kmem) gdma_cq = cq->queue.kmem; else From 012796f9541fcd0c1fa8ae4da7eb4d83931ef838 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:37 -0300 Subject: [PATCH 479/554] RDMA/mana: Validate rx_hash_key_len commit 6dd2d4ad9c8429523b1c220c5132bd551c006425 upstream. Sashiko points out that rx_hash_key_len comes from a uAPI structure and is blindly passed to memcpy, allowing the userspace to trash kernel memory. Bounds check it so the memcpy cannot overflow. Cc: stable@vger.kernel.org Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1 Link: https://patch.msgid.link/r/4-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reviewed-by: Long Li Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mana/qp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 67177d92f11a1..f00bf3b015e73 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -21,6 +21,9 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, gc = mdev_to_gc(dev); + if (rx_hash_key_len > sizeof(req->hashkey)) + return -EINVAL; + req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) From 1e2a44875b6afb4add1115f7f3351dcbeb6f273d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:45 -0300 Subject: [PATCH 480/554] RDMA/mlx4: Fix mis-use of RCU in mlx4_srq_event() commit c9341307ea16b9395c2e4c9c94d8499d91fe31d0 upstream. Sashiko points out the radix_tree itself is RCU safe, but nothing ever frees the mlx4_srq struct with RCU, and it isn't even accessed within the RCU critical section. It also will crash if an event is delivered before the srq object is finished initializing. Use the spinlock since it isn't easy to make RCU work, use refcount_inc_not_zero() to protect against partially initialized objects, and order the refcount_set() to be after the srq is fully initialized. Cc: stable@vger.kernel.org Fixes: 30353bfc43a1 ("net/mlx4_core: Use RCU to perform radix tree lookup for SRQ") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=5 Link: https://patch.msgid.link/r/12-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/srq.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index dd890f5d7b725..8711689120f30 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -44,13 +44,14 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; + unsigned long flags; - rcu_read_lock(); + spin_lock_irqsave(&srq_table->lock, flags); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); - rcu_read_unlock(); - if (srq) - refcount_inc(&srq->refcount); - else { + if (!srq || !refcount_inc_not_zero(&srq->refcount)) + srq = NULL; + spin_unlock_irqrestore(&srq_table->lock, flags); + if (!srq) { mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); return; } @@ -203,8 +204,8 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd, if (err) goto err_radix; - refcount_set(&srq->refcount, 1); init_completion(&srq->free); + refcount_set_release(&srq->refcount, 1); return 0; From e01b8c9286c470b71a38acd320106f2c4f2826a1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:44 -0300 Subject: [PATCH 481/554] RDMA/mlx4: Fix resource leak on error in mlx4_ib_create_srq() commit c54c7e4cb679c0aaa1cb489b9c3f2cd98e63a44c upstream. Sashiko points out that mlx4_srq_alloc() was not undone during error unwind, add the missing call to mlx4_srq_free(). Cc: stable@vger.kernel.org Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=8 Link: https://patch.msgid.link/r/11-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/srq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index c4cf91235eee3..68e8b04c53880 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -193,13 +193,15 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { err = -EFAULT; - goto err_wrid; + goto err_srq; } init_attr->attr.max_wr = srq->msrq.max - 1; return 0; +err_srq: + mlx4_srq_free(dev->dev, &srq->msrq); err_wrid: if (udata) mlx4_ib_db_unmap_user(ucontext, &srq->db); From b087913ae88256df66620f7ba0a9776716aeef7e Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Fri, 24 Apr 2026 13:51:02 +0800 Subject: [PATCH 482/554] RDMA/mlx5: Fix error path fall-through in mlx5_ib_dev_res_srq_init() commit c488df06bd552bb8b6e14fa0cfd5ad986c6e9525 upstream. mlx5_ib_dev_res_srq_init() allocates two SRQs, s0 and s1. When ib_create_srq() fails for s1, the error branch destroys s0 but falls through and unconditionally assigns the freed s0 and the ERR_PTR s1 to devr->s0 and devr->s1. This leads to several problems: the lock-free fast path checks "if (devr->s1) return 0;" and treats the ERR_PTR as already initialised; users in mlx5_ib_create_qp() dereference the freed SRQ or ERR_PTR via to_msrq(devr->s0)->msrq.srqn; and mlx5_ib_dev_res_cleanup() dereferences the ERR_PTR and double-frees s0 on teardown. Fix by adding the same `goto unlock` in the s1 failure path. Cc: stable@vger.kernel.org Fixes: 5895e70f2e6e ("IB/mlx5: Allocate resources just before first QP/SRQ is created") Link: https://patch.msgid.link/r/SYBPR01MB7881E1E0970268BD69C0BA75AF2B2@SYBPR01MB7881.ausprd01.prod.outlook.com Reported-by: Yuhao Jiang Signed-off-by: Junrui Luo Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5899bd5cb1623..984c258ab0a89 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3181,6 +3181,7 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev) "Couldn't create SRQ 1 for res init, err=%pe\n", s1); ib_destroy_srq(s0); + goto unlock; } devr->s0 = s0; From 8832626a483439e207734e027afff322ccdf726e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:42 -0300 Subject: [PATCH 483/554] RDMA/ocrdma: Don't NULL deref uctx on errors in ocrdma_copy_pd_uresp() commit 34fbf48cf3b410d2a6e8c586fa952a36331ca5ba upstream. Sashiko points out that pd->uctx isn't initialized until late in the function so all these error flow references are NULL and will crash. Use the uctx that isn't NULL. Cc: stable@vger.kernel.org Fixes: fe2caefcdf58 ("RDMA/ocrdma: Add driver for Emulex OneConnect IBoE RDMA adapter") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 Link: https://patch.msgid.link/r/9-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 46d911fd38dee..954a50d5c34d7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -620,9 +620,9 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, ucopy_err: if (pd->dpp_enabled) - ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE); + ocrdma_del_mmap(uctx, dpp_page_addr, PAGE_SIZE); dpp_map_err: - ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size); + ocrdma_del_mmap(uctx, db_page_addr, db_page_size); return status; } From 105bf79a23b85cf3a761d18a4f3e10ce88526bc1 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sat, 18 Apr 2026 12:21:41 -0400 Subject: [PATCH 484/554] RDMA/rxe: Reject non-8-byte ATOMIC_WRITE payloads commit 1114c87aa6f195cf07da55a27b2122ae26557b26 upstream. atomic_write_reply() at drivers/infiniband/sw/rxe/rxe_resp.c unconditionally dereferences 8 bytes at payload_addr(pkt): value = *(u64 *)payload_addr(pkt); check_rkey() previously accepted an ATOMIC_WRITE request with pktlen == resid == 0 because the length validation only compared pktlen against resid. A remote initiator that sets the RETH length to 0 therefore reaches atomic_write_reply() with a zero-byte logical payload, and the responder reads sizeof(u64) bytes from past the logical end of the packet into skb->head tailroom, then writes those 8 bytes into the attacker's MR via rxe_mr_do_atomic_write(). That is a remote disclosure of 4 bytes of kernel tailroom per probe (the other 4 bytes are the packet's own trailing ICRC). IBA oA19-28 defines ATOMIC_WRITE as exactly 8 bytes. Anything else is protocol-invalid. Hoist a strict length check into check_rkey() so the responder never reaches the unchecked dereference, and keep the existing WRITE-family length logic for the normal RDMA WRITE path. Reproduced on mainline with an unmodified rxe driver: a sustained zero-length ATOMIC_WRITE probe repeatedly leaks adjacent skb head-buffer bytes into the attacker's MR, including recognisable kernel strings and partial kernel-direct-map pointer words. With this patch applied the responder rejects the PDU and the MR stays all-zero. Cc: stable@vger.kernel.org Fixes: 034e285f8b99 ("RDMA/rxe: Make responder support atomic write on RC service") Link: https://patch.msgid.link/r/20260418162141.3610201-1-michael.bommarito@gmail.com Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Reviewed-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_resp.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 711f73e0bbb1c..09ba21d0f3c4f 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -526,7 +526,19 @@ static enum resp_states check_rkey(struct rxe_qp *qp, } skip_check_range: - if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { + if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { + /* IBA oA19-28: ATOMIC_WRITE payload is exactly 8 bytes. + * Reject any other length before the responder reads + * sizeof(u64) bytes from payload_addr(pkt); a shorter + * payload would read past the logical end of the packet + * into skb->head tailroom. + */ + if (resid != sizeof(u64) || pktlen != sizeof(u64) || + bth_pad(pkt)) { + state = RESPST_ERR_LENGTH; + goto err; + } + } else if (pkt->mask & RXE_WRITE_MASK) { if (resid > mtu) { if (pktlen != mtu || bth_pad(pkt)) { state = RESPST_ERR_LENGTH; From 006a3a5f75345c6a0dbf13fd3ee01406e93b6733 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Tue, 14 Apr 2026 07:15:55 -0400 Subject: [PATCH 485/554] RDMA/rxe: Reject unknown opcodes before ICRC processing commit 4c6f86d85d03cdb33addce86aa69aa795ca6c47a upstream. Even after applying commit 7244491dab34 ("RDMA/rxe: Validate pad and ICRC before payload_size() in rxe_rcv"), a single unauthenticated UDP packet can still trigger panic. That patch handled payload_size() underflow only for valid opcodes with short packets, not for packets carrying an unknown opcode. The unknown-opcode OOB read described below predates that commit and reaches back to the initial Soft RoCE driver. The check added there reads pkt->paylen < header_size(pkt) + bth_pad(pkt) + RXE_ICRC_SIZE where header_size(pkt) expands to rxe_opcode[pkt->opcode].length. The rxe_opcode[] array has 256 entries but is only populated for defined IB opcodes; any other entry (for example opcode 0xff) is zero-initialized, so length == 0 and the check degenerates to pkt->paylen < 0 + bth_pad(pkt) + RXE_ICRC_SIZE which does not constrain pkt->paylen enough. rxe_icrc_hdr() then computes rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES which underflows when length == 0 and passes a huge value to rxe_crc32(), causing an out-of-bounds read of the skb payload. Reproduced on v7.0-rc7 with that fix applied, QEMU/KVM with CONFIG_RDMA_RXE=y and CONFIG_KASAN=y, after rdma link add rxe0 type rxe netdev eth0 A single 48-byte UDP packet to port 4791 with BTH opcode=0xff and QPN=IB_MULTICAST_QPN triggers: BUG: KASAN: slab-out-of-bounds in crc32_le+0x115/0x170 Read of size 1 at addr ... The buggy address is located 0 bytes to the right of allocated 704-byte region Call Trace: crc32_le+0x115/0x170 rxe_icrc_hdr.isra.0+0x226/0x300 rxe_icrc_check+0x13f/0x3a0 rxe_rcv+0x6e1/0x16e0 rxe_udp_encap_recv+0x20a/0x320 udp_queue_rcv_one_skb+0x7ed/0x12c0 Subsequent packets with the same shape fault on unmapped memory and panic the kernel. The trigger requires only module load and "rdma link add"; no QP, no connection, and no authentication. Fix this by rejecting packets whose opcode has no rxe_opcode[] entry, detected via the zero mask or zero length, before any length arithmetic runs. Cc: stable@vger.kernel.org Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://patch.msgid.link/r/20260414111555.3386793-1-michael.bommarito@gmail.com Assisted-by: Claude:claude-opus-4-6 Signed-off-by: Michael Bommarito Reviewed-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_recv.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index f79214738c2b8..2d5e701ff961a 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -330,6 +330,17 @@ void rxe_rcv(struct sk_buff *skb) pkt->qp = NULL; pkt->mask |= rxe_opcode[pkt->opcode].mask; + /* + * Unknown opcodes have a zero-initialized rxe_opcode[] entry, so + * both mask and length are 0. Reject them before any length math: + * rxe_icrc_hdr() would otherwise compute length - RXE_BTH_BYTES + * and pass the underflowed value to rxe_crc32(), producing an + * out-of-bounds read. + */ + if (unlikely(!rxe_opcode[pkt->opcode].mask || + !rxe_opcode[pkt->opcode].length)) + goto drop; + if (unlikely(pkt->paylen < header_size(pkt) + bth_pad(pkt) + RXE_ICRC_SIZE)) goto drop; From 0c63333ff97bd1275294fd12840a0efe9d7a4c59 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:43 -0300 Subject: [PATCH 486/554] RDMA/vmw_pvrdma: Fix double free on pvrdma_alloc_ucontext() error path commit e38e86995df27f1f854063dab1f0c6a513db3faf upstream. Sashiko points out that pvrdma_uar_free() is already called within pvrdma_dealloc_ucontext(), so calling it before triggers a double free. Cc: stable@vger.kernel.org Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 Link: https://patch.msgid.link/r/10-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index bcd43dc30e21c..c7c2b41060e52 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -322,7 +322,7 @@ int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) uresp.qp_tab_size = vdev->dsr->caps.max_qp; ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (ret) { - pvrdma_uar_free(vdev, &context->uar); + /* pvrdma_dealloc_ucontext() also frees the UAR */ pvrdma_dealloc_ucontext(&context->ibucontext); return -EFAULT; } From a8f4a82e5629cdf94c55a39c70a62310f73b46ce Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 30 Apr 2026 10:27:47 +0100 Subject: [PATCH 487/554] sched_ext: idle: Recheck prev_cpu after narrowing allowed mask commit b34c82777a2c0648ee053595f4b290fd5249b093 upstream. scx_select_cpu_dfl() narrows @allowed to @cpus_allowed & @p->cpus_ptr when the BPF caller supplies a @cpus_allowed that differs from @p->cpus_ptr and @p doesn't have full affinity. However, @is_prev_allowed was computed against the original (wider) @cpus_allowed, so the prev_cpu fast paths could pick a @prev_cpu that is in @cpus_allowed but not in @p->cpus_ptr, violating the intended invariant that the returned CPU is always usable by @p. The kernel masks this via the SCX_EV_SELECT_CPU_FALLBACK fallback, but the behavior contradicts the documented contract. Move the @is_prev_allowed evaluation past the narrowing block so it tests against the final @allowed mask. Fixes: ee9a4e92799d ("sched_ext: idle: Properly handle invalid prev_cpu during idle selection") Cc: stable@vger.kernel.org # v6.16+ Assisted-by: Claude Signed-off-by: David Carlier Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/sched/ext_idle.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index f880e3dcadfa6..d16a1de078656 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -459,12 +459,6 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, preempt_disable(); - /* - * Check whether @prev_cpu is still within the allowed set. If not, - * we can still try selecting a nearby CPU. - */ - is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed); - /* * Determine the subset of CPUs usable by @p within @cpus_allowed. */ @@ -481,6 +475,12 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, } } + /* + * Check whether @prev_cpu is still within the allowed set. If not, + * we can still try selecting a nearby CPU. + */ + is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed); + /* * This is necessary to protect llc_cpus. */ From 2efd6bd0e9b19fc0ac75a5cad74ab2017319ca8c Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:58 +0200 Subject: [PATCH 488/554] selftests: mptcp: check output: catch cmd errors commit 65db7b27b90e2ea8d4966935aa9a50b6a60c31ac upstream. Using '${?}' inside the if-statement to check the returned value from the command that was evaluated as part of the if-statement is not correct: here, '${?}' will be linked to the previous instruction, not the one that is expected here (${cmd}). Instead, simply mark the error, except if an error is expected. If that's the case, 1 can be passed as the 4th argument of this helper. Three checks from pm_netlink.sh expect an error. While at it, improve the error message when the command unexpectedly fails or succeeds. Note that we could expect a specific returned value, but the checks currently expecting an error can be used with 'ip mptcp' or 'pm_nl_ctl', and these two tools don't return the same error code. Fixes: 2d0c1d27ea4e ("selftests: mptcp: add mptcp_lib_check_output helper") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-10-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 16 ++++++++++------ tools/testing/selftests/net/mptcp/pm_netlink.sh | 10 ++++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index f4388900016ab..8b5c2285ad73c 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -430,20 +430,24 @@ mptcp_lib_wait_local_port_listen() { wait_local_port_listen "${@}" "tcp" } +# $1: error file, $2: cmd, $3: expected msg, [$4: expected error] mptcp_lib_check_output() { local err="${1}" local cmd="${2}" local expected="${3}" + local exp_error="${4:-0}" local cmd_ret=0 local out - if ! out=$(${cmd} 2>"${err}"); then - cmd_ret=${?} - fi + out=$(${cmd} 2>"${err}") || cmd_ret=1 - if [ ${cmd_ret} -ne 0 ]; then - mptcp_lib_pr_fail "command execution '${cmd}' stderr" - cat "${err}" + if [ "${cmd_ret}" != "${exp_error}" ]; then + mptcp_lib_pr_fail "unexpected returned code for '${cmd}', info:" + if [ "${exp_error}" = 0 ]; then + cat "${err}" + else + echo "${out}" + fi return 2 elif [ "${out}" = "${expected}" ]; then return 0 diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 123d9d7a0278c..b69f30fcb91e9 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -122,10 +122,12 @@ check() local cmd="$1" local expected="$2" local msg="$3" + local exp_error="$4" local rc=0 mptcp_lib_print_title "$msg" - mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" "${exp_error}" || + rc=${?} if [ ${rc} -eq 2 ]; then mptcp_lib_result_fail "${msg} # error ${rc}" ret=${KSFT_FAIL} @@ -158,13 +160,13 @@ check "show_endpoints" \ "3,10.0.1.3,signal backup")" "dump addrs" del_endpoint 2 -check "get_endpoint 2" "" "simple del addr" +check "get_endpoint 2" "" "simple del addr" 1 check "show_endpoints" \ "$(format_endpoints "1,10.0.1.1" \ "3,10.0.1.3,signal backup")" "dump addrs after del" add_endpoint 10.0.1.3 2>/dev/null -check "get_endpoint 4" "" "duplicate addr" +check "get_endpoint 4" "" "duplicate addr" 1 add_endpoint 10.0.1.4 flags signal check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment" @@ -173,7 +175,7 @@ for i in $(seq 5 9); do add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1 done check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit" -check "get_endpoint 10" "" "above hard addr limit" +check "get_endpoint 10" "" "above hard addr limit" 1 del_endpoint 9 for i in $(seq 10 255); do From 2146145d6dd6827e5ea4bb710f5b0473bf4ea1cf Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:59 +0200 Subject: [PATCH 489/554] selftests: mptcp: pm: restrict 'unknown' check to pm_nl_ctl commit 53705ddfa18408f8e1f064331b6387509fa19f7f upstream. When pm_netlink.sh is executed with '-i', 'ip mptcp' is used instead of 'pm_nl_ctl'. IPRoute2 doesn't support the 'unknown' flag, which has only been added to 'pm_nl_ctl' for this specific check: to ensure that the kernel ignores such unsupported flag. No reason to add this flag to 'ip mptcp'. Then, this check should be skipped when 'ip mptcp' is used. Fixes: 0cef6fcac24d ("selftests: mptcp: ip_mptcp option for more scripts") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-11-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index b69f30fcb91e9..04594dfc22b13 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -194,9 +194,13 @@ check "show_endpoints" \ flush_endpoint check "show_endpoints" "" "flush addrs" -add_endpoint 10.0.1.1 flags unknown -check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags" -flush_endpoint +# "unknown" flag is only supported by pm_nl_ctl +if ! mptcp_lib_is_ip_mptcp; then + add_endpoint 10.0.1.1 flags unknown + check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" \ + "ignore unknown flags" + flush_endpoint +fi set_limits 9 1 2>/dev/null check "get_limits" "${default_limits}" "rcv addrs above hard limit" From 4f269fc3c73af49aa3ee7769ab5d3e94fc7bba57 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 27 Apr 2026 21:54:35 +0200 Subject: [PATCH 490/554] mptcp: fastclose msk when linger time is 0 commit f14d6e9c3678a067f304abba561e0c5446c7e845 upstream. The SO_LINGER socket option has been supported for a while with MPTCP sockets [1], but it didn't cause the equivalent of a TCP reset as expected when enabled and its time was set to 0. This was causing some behavioural differences with TCP where some connections were not promptly stopped as expected. To fix that, an extra condition is checked at close() time before sending an MP_FASTCLOSE, the MPTCP equivalent of a TCP reset. Note that backporting up to [1] will be difficult as more changes are needed to be able to send MP_FASTCLOSE. It seems better to stop at [2], which was supposed to already imitate TCP. Validated with MPTCP packetdrill tests [3]. Fixes: 268b12387460 ("mptcp: setsockopt: support SO_LINGER") [1] Fixes: d21f83485518 ("mptcp: use fastclose on more edge scenarios") [2] Cc: stable@vger.kernel.org Reported-by: Lance Tuller Closes: https://github.com/lance0/xfr/pull/67 Link: https://github.com/multipath-tcp/packetdrill/pull/196 [3] Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-3-7432b7f279fa@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c805d36fe50d5..1dbe6c8a8b8c5 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3162,7 +3162,8 @@ bool __mptcp_close(struct sock *sk, long timeout) goto cleanup; } - if (mptcp_data_avail(msk) || timeout < 0) { + if (mptcp_data_avail(msk) || timeout < 0 || + (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) { /* If the msk has read data, or the caller explicitly ask it, * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose */ From fa26c8deed612e95c81425bc881ae1b4fa4ece93 Mon Sep 17 00:00:00 2001 From: Shardul Bankar Date: Fri, 1 May 2026 21:35:34 +0200 Subject: [PATCH 491/554] mptcp: use MPJoinSynAckHMacFailure for SynAck HMAC failure commit c4a99a921949cddc590b22bb14eeb23dffcc3ba6 upstream. In subflow_finish_connect(), HMAC validation of the server's HMAC in SYN/ACK + MP_JOIN increments MPTCP_MIB_JOINACKMAC ("HMAC was wrong on ACK + MP_JOIN") on failure. The function processes the SYN/ACK, not the ACK; the matching MPTCP_MIB_JOINSYNACKMAC counter ("HMAC was wrong on SYN/ACK + MP_JOIN") exists but is not incremented anywhere in the tree. The mirror site on the server, subflow_syn_recv_sock(), already uses JOINACKMAC correctly for ACK HMAC failure. Use JOINSYNACKMAC at the SYN/ACK validation site so each counter reflects the packet whose HMAC actually failed. Suggested-by: Matthieu Baerts (NGI0) Fixes: fc518953bc9c ("mptcp: add and use MIB counter infrastructure") Cc: stable@vger.kernel.org Signed-off-by: Shardul Bankar Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-1-b70118df778e@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/subflow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 90927f6cad26a..2645477e6d6b5 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -580,7 +580,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup); if (!subflow_thmac_valid(subflow)) { - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKMAC); subflow->reset_reason = MPTCP_RST_EMPTCP; goto do_reset; } From e74f33c8a2c79824f7725c09162fa01eb7e0728a Mon Sep 17 00:00:00 2001 From: Shardul Bankar Date: Fri, 1 May 2026 21:35:35 +0200 Subject: [PATCH 492/554] mptcp: use MPTCP_RST_EMPTCP for ACK HMAC validation failure commit a6da02d4c00fdda2417e42ad2b762a9209e6cc49 upstream. When HMAC validation fails on a received ACK + MP_JOIN in subflow_syn_recv_sock(), the subflow is reset with reason MPTCP_RST_EPROHIBIT ("Administratively prohibited"). This is incorrect: HMAC validation failure is an MPTCP protocol-level error, not an administrative policy denial. The mirror site on the client, in subflow_finish_connect(), already uses MPTCP_RST_EMPTCP ("MPTCP-specific error") for the same kind of HMAC failure on the SYN/ACK + MP_JOIN. Use the same reason on the server side for symmetry and accuracy. Suggested-by: Matthieu Baerts (NGI0) Fixes: 443041deb5ef ("mptcp: fix NULL pointer in can_accept_new_subflow") Cc: stable@vger.kernel.org Signed-off-by: Shardul Bankar Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-2-b70118df778e@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/subflow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 2645477e6d6b5..3226fef7237ef 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -907,7 +907,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, if (!subflow_hmac_valid(subflow_req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); - subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP); goto dispose_child; } From c7362eef3b70bc68255e0a7336308417d7c40bf1 Mon Sep 17 00:00:00 2001 From: Gang Yan Date: Mon, 27 Apr 2026 21:54:33 +0200 Subject: [PATCH 493/554] mptcp: sockopt: set timestamp flags on subflow socket, not msk commit 5f95c21fc23a7ef22b4d27d1ed9bb55557ffb926 upstream. Both mptcp_setsockopt_sol_socket_tstamp() and mptcp_setsockopt_sol_socket_timestamping() iterate over subflows, acquire the subflow socket lock, but then erroneously pass the MPTCP msk socket to sock_set_timestamp() / sock_set_timestamping() instead of the subflow ssk. As a result, the timestamp flags are set on the wrong socket and have no effect on the actual subflows. Pass ssk instead of sk to both helpers. Fixes: 9061f24bf82e ("mptcp: sockopt: propagate timestamp request to subflows") Cc: stable@vger.kernel.org Signed-off-by: Gang Yan Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-1-7432b7f279fa@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/sockopt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index a28a483858852..40fe4e93d900f 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -161,7 +161,7 @@ static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optnam struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); - sock_set_timestamp(sk, optname, !!val); + sock_set_timestamp(ssk, optname, !!val); unlock_sock_fast(ssk, slow); } @@ -237,7 +237,7 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); - sock_set_timestamping(sk, optname, timestamping); + sock_set_timestamping(ssk, optname, timestamping); unlock_sock_fast(ssk, slow); } From e083d1d8bbec1976dedcdbbe17bee5ee96df4855 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 1 May 2026 21:35:37 +0200 Subject: [PATCH 494/554] mptcp: sockopt: increase seq in mptcp_setsockopt_all_sf commit 70ece9d7021c54cf40c72b31b066e9088f5f75f5 upstream. mptcp_setsockopt_all_sf() was missing a call to sockopt_seq_inc(). This is required not to cause missing synchronization for newer subflows created later on. This helper is called each time a socket option is set on subflows, and future ones will need to inherit this option after their creation. Fixes: 51c5fd09e1b4 ("mptcp: add TCP_MAXSEG sockopt support") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-4-b70118df778e@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/sockopt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 40fe4e93d900f..7e131c44945e9 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -812,6 +812,10 @@ static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, if (ret) break; } + + if (!ret) + sockopt_seq_inc(msk); + return ret; } From e6ad28d9d5bff3f7eb8212be8658fae31f3d273e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 1 May 2026 21:35:36 +0200 Subject: [PATCH 495/554] mptcp: fix rx timestamp corruption on fastopen commit 6254a16d6f0c672e3809ca5d7c9a28a55d71f764 upstream. The skb cb offset containing the timestamp presence flag is cleared before loading such information. Cache such value before MPTCP CB initialization. Fixes: 36b122baf6a8 ("mptcp: add subflow_v(4,6)_send_synack()") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-3-b70118df778e@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/fastopen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index b9e4511979028..c8aa5426f1af9 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -12,6 +12,7 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf struct sock *sk, *ssk; struct sk_buff *skb; struct tcp_sock *tp; + bool has_rxtstamp; /* on early fallback the subflow context is deleted by * subflow_syn_recv_sock() @@ -39,12 +40,13 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf */ tp->copied_seq += skb->len; subflow->ssn_offset += skb->len; + has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; /* Only the sequence delta is relevant */ MPTCP_SKB_CB(skb)->map_seq = -skb->len; MPTCP_SKB_CB(skb)->end_seq = 0; MPTCP_SKB_CB(skb)->offset = 0; - MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp; MPTCP_SKB_CB(skb)->cant_coalesce = 1; mptcp_data_lock(sk); From 8a005fe451c73fd2b3d1faa5643c11e6bd07acfc Mon Sep 17 00:00:00 2001 From: Gang Yan Date: Mon, 27 Apr 2026 21:54:34 +0200 Subject: [PATCH 496/554] mptcp: fix scheduling with atomic in timestamp sockopt commit b5c52908d52c6c8eb8933264aa6087a0600fd892 upstream. Using lock_sock_fast() (atomic context) around sock_set_timestamp() and sock_set_timestamping() is unsafe, as both helpers can sleep. Replace lock_sock_fast() with sleepable lock_sock()/release_sock() to avoid scheduling while atomic panic. Fixes: 9061f24bf82e ("mptcp: sockopt: propagate timestamp request to subflows") Cc: stable@vger.kernel.org Reported-by: Sashiko Closes: https://sashiko.dev/#/patchset/20260420093343.16443-1-gang.yan@linux.dev Signed-off-by: Gang Yan Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-2-7432b7f279fa@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/sockopt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 7e131c44945e9..de12b3c548edd 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -159,10 +159,10 @@ static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optnam lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); + lock_sock(ssk); sock_set_timestamp(ssk, optname, !!val); - unlock_sock_fast(ssk, slow); + release_sock(ssk); } release_sock(sk); @@ -235,10 +235,10 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); + lock_sock(ssk); sock_set_timestamping(ssk, optname, timestamping); - unlock_sock_fast(ssk, slow); + release_sock(ssk); } release_sock(sk); From 6f5ba49da8b1ab9e0edcc8a69bdc074f1794b5ab Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:57 +0200 Subject: [PATCH 497/554] mptcp: pm: prio: skip closed subflows commit 166b78344031bf7ac9f55cb5282776cfd85f220e upstream. When sending an MP_PRIO, closed subflows need to be skipped. This fixes the case where the initial subflow got closed, re-opened later, then an MP_PRIO is needed for the same local address. Note that explicit MP_PRIO cannot be sent during the 3WHS, so it is fine to use __mptcp_subflow_active(). Fixes: 067065422fcd ("mptcp: add the outgoing MP_PRIO support") Cc: stable@vger.kernel.org Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation") Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-9-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index ec1c570bc4525..da2d1af03629f 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -283,6 +283,9 @@ int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_addr_info local, remote; + if (!__mptcp_subflow_active(subflow)) + continue; + mptcp_local_address((struct sock_common *)ssk, &local); if (!mptcp_addresses_equal(&local, addr, addr->port)) continue; From 07c2f3c29a8a03e80cdc073838448f58c5182855 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:49 +0200 Subject: [PATCH 498/554] mptcp: pm: kernel: correctly retransmit ADD_ADDR ID 0 commit b12014d2d36eaed4e4bec5f1ac7e91110eeb100d upstream. When adding the ADD_ADDR to the list, the address including the IP, port and ID are copied. On the other hand, when the endpoint corresponds to the one from the initial subflow, the ID is set to 0, as specified by the MPTCP protocol. The issue is that the ID was reset after having copied the ID in the ADD_ADDR entry. So the retransmission was done, but using a different ID than the initial one. Fixes: 8b8ed1b429f8 ("mptcp: pm: reuse ID 0 after delete and re-add") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-1-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_kernel.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index 4494cf4c38282..a31a845e5cd42 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -336,6 +336,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check first for announce */ if (msk->pm.add_addr_signaled < endp_signal_max) { + u8 endp_id; + /* due to racing events on both ends we can reach here while * previous add address is still running: if we invoke now * mptcp_pm_announce_addr(), that will fail and the @@ -349,19 +351,20 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (!select_signal_address(pernet, msk, &local)) goto subflow; + /* Special case for ID0: set the correct ID */ + endp_id = local.addr.id; + if (endp_id == msk->mpc_endpoint_id) + local.addr.id = 0; + /* If the alloc fails, we are on memory pressure, not worth * continuing, and trying to create subflows. */ if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) return; - __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + __clear_bit(endp_id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; - /* Special case for ID0: set the correct ID */ - if (local.addr.id == msk->mpc_endpoint_id) - local.addr.id = 0; - mptcp_pm_announce_addr(msk, &local.addr, false); mptcp_pm_addr_send_ack(msk); From cffc6fc446a27f3eb35779b6285089e6849259b7 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:50 +0200 Subject: [PATCH 499/554] mptcp: pm: ADD_ADDR rtx: allow ID 0 commit 03f324f3f1f7619a47b9c91282cb12775ab0a2f1 upstream. ADD_ADDR can be sent for the ID 0, which corresponds to the local address and port linked to the initial subflow. Indeed, this address could be removed, and re-added later on, e.g. what is done in the "delete re-add signal" MPTCP Join selftests. So no reason to ignore it. Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-2-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index da2d1af03629f..8852aef5e6187 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -350,9 +350,6 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (inet_sk_state_load(sk) == TCP_CLOSE) return; - if (!entry->addr.id) - return; - if (mptcp_pm_should_add_signal_addr(msk)) { sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); goto out; From 2ad56e434199ca24a812bb353667aa1c3860f513 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:51 +0200 Subject: [PATCH 500/554] mptcp: pm: ADD_ADDR rtx: fix potential data-race commit 5cd6e0ad79d2615264f63929f8b457ad97ae550d upstream. This mptcp_pm_add_timer() helper is executed as a timer callback in softirq context. To avoid any data races, the socket lock needs to be held with bh_lock_sock(). If the socket is in use, retry again soon after, similar to what is done with the keepalive timer. Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-3-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 8852aef5e6187..5cb62ab42cbe1 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -350,6 +350,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (inet_sk_state_load(sk) == TCP_CLOSE) return; + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + sk_reset_timer(sk, timer, jiffies + HZ / 20); + goto out; + } + if (mptcp_pm_should_add_signal_addr(msk)) { sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); goto out; @@ -378,6 +385,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) mptcp_pm_subflow_established(msk); out: + bh_unlock_sock(sk); __sock_put(sk); } From acd3d3562315c99f3c0db16f0fcc5f0306638982 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:52 +0200 Subject: [PATCH 501/554] mptcp: pm: ADD_ADDR rtx: always decrease sk refcount commit 9634cb35af17019baec21ca648516ce376fa10e6 upstream. When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(). It should then be released in all cases at the end. Some (unlikely) checks were returning directly instead of calling sock_put() to decrease the refcount. Jump to a new 'exit' label to call __sock_put() (which will become sock_put() in the next commit) to fix this potential leak. While at it, drop the '!msk' check which cannot happen because it is never reset, and explicitly mark the remaining one as "unlikely". Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-4-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 5cb62ab42cbe1..4dcac6135fbfa 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -344,11 +344,8 @@ static void mptcp_pm_add_timer(struct timer_list *timer) pr_debug("msk=%p\n", msk); - if (!msk) - return; - - if (inet_sk_state_load(sk) == TCP_CLOSE) - return; + if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE)) + goto exit; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -386,6 +383,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) out: bh_unlock_sock(sk); +exit: __sock_put(sk); } From b74ad20198652b6b39a761c277ba65ae82b1e107 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:53 +0200 Subject: [PATCH 502/554] mptcp: pm: ADD_ADDR rtx: free sk if last commit b7b9a461569734d33d3259d58d2507adfac107ed upstream. When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(), and released at the end. If at that moment, it was the last reference being held, the sk would not be freed. sock_put() should then be called instead of __sock_put(). But that's not enough: if it is the last reference, sock_put() will call sk_free(), which will end up calling sk_stop_timer_sync() on the same timer, and waiting indefinitely to finish. So it is needed to mark that the timer is done at the end of the timer handler when it has not been rescheduled, not to call sk_stop_timer_sync() on "itself". Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-5-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 4dcac6135fbfa..a1edfb74f924f 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -16,6 +16,7 @@ struct mptcp_pm_add_entry { struct list_head list; struct mptcp_addr_info addr; u8 retrans_times; + bool timer_done; struct timer_list add_timer; struct mptcp_sock *sock; struct rcu_head rcu; @@ -340,22 +341,22 @@ static void mptcp_pm_add_timer(struct timer_list *timer) add_timer); struct mptcp_sock *msk = entry->sock; struct sock *sk = (struct sock *)msk; - unsigned int timeout; + unsigned int timeout = 0; pr_debug("msk=%p\n", msk); + bh_lock_sock(sk); if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE)) - goto exit; + goto out; - bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ - sk_reset_timer(sk, timer, jiffies + HZ / 20); + timeout = HZ / 20; goto out; } if (mptcp_pm_should_add_signal_addr(msk)) { - sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); + timeout = TCP_RTO_MAX / 8; goto out; } @@ -373,8 +374,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer) } if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) - sk_reset_timer(sk, timer, - jiffies + (timeout << entry->retrans_times)); + timeout <<= entry->retrans_times; + else + timeout = 0; spin_unlock_bh(&msk->pm.lock); @@ -382,9 +384,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer) mptcp_pm_subflow_established(msk); out: + if (timeout) + sk_reset_timer(sk, timer, jiffies + timeout); + else + /* if sock_put calls sk_free: avoid waiting for this timer */ + entry->timer_done = true; bh_unlock_sock(sk); -exit: - __sock_put(sk); + sock_put(sk); } struct mptcp_pm_add_entry * @@ -447,6 +453,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); reset_timer: + add_entry->timer_done = false; timeout = mptcp_adjust_add_addr_timeout(msk); if (timeout) sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout); @@ -467,7 +474,8 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); list_for_each_entry_safe(entry, tmp, &free_list, list) { - sk_stop_timer_sync(sk, &entry->add_timer); + if (!entry->timer_done) + sk_stop_timer_sync(sk, &entry->add_timer); kfree_rcu(entry, rcu); } } From 107e956ce5441192946f53d2aea186d9ca0bd745 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:54 +0200 Subject: [PATCH 503/554] mptcp: pm: ADD_ADDR rtx: resched blocked ADD_ADDR quicker commit 3cf12492891c4b5ff54dda404a2de4ec54c9e1b5 upstream. When an ADD_ADDR needs to be retransmitted and another one has already been prepared -- e.g. multiple ADD_ADDRs have been sent in a row and need to be retransmitted later -- this additional retransmission will need to wait. In this case, the timer was reset to TCP_RTO_MAX / 8, which is ~15 seconds. This delay is unnecessary long: it should just be rescheduled at the next opportunity, e.g. after the retransmission timeout. Without this modification, some issues can be seen from time to time in the selftests when multiple ADD_ADDRs are sent, and the host takes time to process them, e.g. the "signal addresses, ADD_ADDR timeout" MPTCP Join selftest, especially with a debug kernel config. Note that on older kernels, 'timeout' is not available. It should be enough to replace it by one second (HZ). Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-6-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index a1edfb74f924f..850de47fdae00 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -355,13 +355,8 @@ static void mptcp_pm_add_timer(struct timer_list *timer) goto out; } - if (mptcp_pm_should_add_signal_addr(msk)) { - timeout = TCP_RTO_MAX / 8; - goto out; - } - timeout = mptcp_adjust_add_addr_timeout(msk); - if (!timeout) + if (!timeout || mptcp_pm_should_add_signal_addr(msk)) goto out; spin_lock_bh(&msk->pm.lock); From 464747eae483e9ac9185012b85a7619f06721587 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:56 +0200 Subject: [PATCH 504/554] mptcp: pm: ADD_ADDR rtx: return early if no retrans commit 62a9b19dce77e72426f049fb99b9d1d032b9a8ea upstream. No need to iterate over all subflows if there is no retransmission needed. Exit early in this case then. Fixes: 30549eebc4d8 ("mptcp: make ADD_ADDR retransmission timeout adaptive") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-8-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 850de47fdae00..8523d3af3ca70 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -311,6 +311,9 @@ static unsigned int mptcp_adjust_add_addr_timeout(struct mptcp_sock *msk) struct mptcp_subflow_context *subflow; unsigned int max = 0, max_stale = 0; + if (!rto) + return 0; + mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct inet_connection_sock *icsk = inet_csk(ssk); From 060029ed542e845fe71e36b46f15619dd255bddf Mon Sep 17 00:00:00 2001 From: Cen Zhang Date: Wed, 18 Mar 2026 15:32:53 +0800 Subject: [PATCH 505/554] f2fs: add READ_ONCE() for i_blocks in f2fs_update_inode() commit 5471834a96fb697874be2ca0b052e74bcf3c23d1 upstream. f2fs_update_inode() reads inode->i_blocks without holding i_lock to serialize it to the on-disk inode, while concurrent truncate or allocation paths may modify i_blocks under i_lock. Since blkcnt_t is u64, this risks torn reads on 32-bit architectures. Following the approach in ext4_inode_blocks_set(), add READ_ONCE() to prevent potential compiler-induced tearing. Fixes: 19f99cee206c ("f2fs: add core inode operations") Cc: stable@vger.kernel.org Signed-off-by: Cen Zhang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 8c4eafe9ffac0..49470f4c9362a 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -677,7 +677,7 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio) ri->i_uid = cpu_to_le32(i_uid_read(inode)); ri->i_gid = cpu_to_le32(i_gid_read(inode)); ri->i_links = cpu_to_le32(inode->i_nlink); - ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1); + ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(READ_ONCE(inode->i_blocks)) + 1); if (!f2fs_is_atomic_file(inode) || is_inode_flag_set(inode, FI_ATOMIC_COMMITTED)) From 63c2d2ad40d1563f7307023b2f0287a711521e3b Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Mon, 23 Mar 2026 20:06:24 +0800 Subject: [PATCH 506/554] f2fs: fix fiemap boundary handling when read extent cache is incomplete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 95e159ad3e52f7478cfd22e44ec37c9f334f8993 upstream. f2fs_fiemap() calls f2fs_map_blocks() to obtain the block mapping a file, and then merges contiguous mappings into extents. If the mapping is found in the read extent cache, node blocks do not need to be read. However, in the following scenario, a contiguous extent can be split into two extents: $ dd if=/dev/zero of=data.128M bs=1M count=128 $ losetup -f data.128M $ mkfs.f2fs /dev/loop0 -f $ mount -o mode=lfs /dev/loop0 /mnt/f2fs/ $ cd /mnt/f2fs/ $ dd if=/dev/zero of=data.72M bs=1M count=72 && sync $ dd if=/dev/zero of=data.4M bs=1M count=4 && sync $ dd if=/dev/zero of=data.4M bs=1M count=2 seek=2 conv=notrunc && sync $ echo 3 > /proc/sys/vm/drop_caches $ dd if=/dev/zero of=data.4M bs=1M count=2 seek=0 conv=notrunc && sync $ dd if=/dev/zero of=data.4M bs=1M count=2 seek=0 conv=notrunc && sync $ f2fs_io fiemap 0 1024 data.4M Fiemap: offset = 0 len = 1024 logical addr. physical addr. length flags 0 0000000000000000 0000000006400000 0000000000200000 00001000 1 0000000000200000 0000000006600000 0000000000200000 00001001 Although the physical addresses of the ranges 0~2MB and 2M~4MB are contiguous, the mapping for the 2M~4MB range is not present in memory. When the physical addresses for the 0~2MB range are updated, no merge happens because the adjacent mapping is missing from the in-memory cache. As a result, fiemap reports two separate extents instead of a single contiguous one. The root cause is that the read extent cache does not guarantee that all blocks of an extent are present in memory. Therefore, when the extent length returned by f2fs_map_blocks_cached() is smaller than maxblocks, the remaining mappings are retrieved via f2fs_get_dnode_of_data() to ensure correct fiemap extent boundary handling. Cc: stable@kernel.org Fixes: cd8fc5226bef ("f2fs: remove the create argument to f2fs_map_blocks") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/data.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index dbbe134257b23..cfdd7bbd7f359 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1567,8 +1567,26 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && map->m_may_create); - if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) - goto out; + if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) { + struct extent_info ei; + + /* + * 1. If map->m_multidev_dio is true, map->m_pblk cannot be + * waitted by f2fs_wait_on_block_writeback_range() and are not + * mergeable. + * 2. If pgofs hits the read extent cache, it means the mapping + * is already cached in the extent cache, but it is not + * mergeable, and there is no need to query the mapping again + * via f2fs_get_dnode_of_data(). + */ + pgofs = (pgoff_t)map->m_lblk + map->m_len; + if (map->m_len == maxblocks || + map->m_multidev_dio || + f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) + goto out; + ofs = map->m_len; + goto map_more; + } map->m_bdev = inode->i_sb->s_bdev; map->m_multidev_dio = @@ -1579,7 +1597,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) /* it only supports block size == page size */ pgofs = (pgoff_t)map->m_lblk; - end = pgofs + maxblocks; +map_more: + end = (pgoff_t)map->m_lblk + maxblocks; if (flag == F2FS_GET_BLOCK_PRECACHE) mode = LOOKUP_NODE_RA; From bedb710b63ae1bd617e65d0a8cf6cea1200b3753 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Tue, 10 Mar 2026 17:36:12 +0800 Subject: [PATCH 507/554] f2fs: fix fsck inconsistency caused by incorrect nat_entry flag usage commit 019f9dda7f66e55eb94cd32e1d3fff5835f73fbc upstream. f2fs_need_dentry_mark() reads nat_entry flags without mutual exclusion with the checkpoint path, which can result in an incorrect inode block marking state. The scenario is as follows: create & write & fsync 'file A' write checkpoint - f2fs_do_sync_file // inline inode - f2fs_write_inode // inode folio is dirty - f2fs_write_checkpoint - f2fs_flush_merged_writes - f2fs_sync_node_pages - f2fs_fsync_node_pages // no dirty node - f2fs_need_inode_block_update // return true - f2fs_fsync_node_pages // inode dirtied - f2fs_need_dentry_mark //return true - f2fs_flush_nat_entries - f2fs_write_checkpoint end - __write_node_folio // inode with DENT_BIT_SHIFT set SPO, "fsck --dry-run" find inode has already checkpointed but still with DENT_BIT_SHIFT set The state observed by f2fs_need_dentry_mark() can differ from the state observed in __write_node_folio() after acquiring sbi->node_write. The root cause is that the semantics of IS_CHECKPOINTED and HAS_FSYNCED_INODE are only guaranteed after the checkpoint write has fully completed. This patch moves set_dentry_mark() into __write_node_folio() and protects it with the sbi->node_write lock. Cc: stable@kernel.org Fixes: 88bd02c9472a ("f2fs: fix conditions to remain recovery information in f2fs_sync_file") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/node.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 591fcdf3ba77b..e14f609e16d59 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1780,13 +1780,12 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted goto redirty_out; } - if (atomic) { - if (!test_opt(sbi, NOBARRIER)) - fio.op_flags |= REQ_PREFLUSH | REQ_FUA; - if (IS_INODE(folio)) - set_dentry_mark(folio, + if (atomic && !test_opt(sbi, NOBARRIER)) + fio.op_flags |= REQ_PREFLUSH | REQ_FUA; + + if (IS_INODE(folio) && (atomic || is_fsync_dnode(folio))) + set_dentry_mark(folio, f2fs_need_dentry_mark(sbi, ino_of_node(folio))); - } /* should add to global list before clearing PAGECACHE status */ if (f2fs_in_warm_node_list(sbi, folio)) { @@ -1927,9 +1926,6 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, if (is_inode_flag_set(inode, FI_DIRTY_INODE)) f2fs_update_inode(inode, folio); - if (!atomic) - set_dentry_mark(folio, - f2fs_need_dentry_mark(sbi, ino)); } /* may be written by other thread */ if (!folio_test_dirty(folio)) From f37012cc7c270a8a434240bb2e627df8be13e5af Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Tue, 3 Feb 2026 21:36:35 +0800 Subject: [PATCH 508/554] f2fs: fix incorrect file address mapping when inline inode is unwritten commit 68a0178981a0f493295afa29f8880246e561494c upstream. When `fileinfo->fi_flags` does not have the `FIEMAP_FLAG_SYNC` bit set and inline data has not been persisted yet, the physical address of the extent is calculated incorrectly for unwritten inline inodes. root@vm:/mnt/f2fs# dd if=/dev/zero of=data.3k bs=3k count=1 root@vm:/mnt/f2fs# f2fs_io fiemap 0 100 data.3k Fiemap: offset = 0 len = 100 logical addr. physical addr. length flags 0 0000000000000000 00000ffffffff16c 0000000000000c00 00000301 This patch fixes the issue by checking if the inode's address is valid. If the inline inode is unwritten, set the physical address to 0 and mark the extent with `FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_DELALLOC` flags. Cc: stable@kernel.org Fixes: 67f8cf3cee6f ("f2fs: support fiemap for inline_data") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/inline.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 58ac831ef704e..e483584c8ded4 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -790,7 +790,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, int f2fs_inline_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { - __u64 byteaddr, ilen; + __u64 byteaddr = 0, ilen; __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED | FIEMAP_EXTENT_LAST; struct node_info ni; @@ -823,9 +823,14 @@ int f2fs_inline_data_fiemap(struct inode *inode, if (err) goto out; - byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; - byteaddr += (char *)inline_data_addr(inode, ifolio) - - (char *)F2FS_INODE(ifolio); + if (__is_valid_data_blkaddr(ni.blk_addr)) { + byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; + byteaddr += (char *)inline_data_addr(inode, ifolio) - + (char *)F2FS_INODE(ifolio); + } else { + f2fs_bug_on(F2FS_I_SB(inode), ni.blk_addr != NEW_ADDR); + flags |= FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN; + } err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err); out: From d438a9fb20d01160cb37314d47faa6d6314c47f3 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Mon, 23 Mar 2026 20:06:22 +0800 Subject: [PATCH 509/554] f2fs: fix incorrect multidevice info in trace_f2fs_map_blocks() commit eb2ca3ca983551a80e16a4a25df5a4ce59df8484 upstream. When f2fs_map_blocks()->f2fs_map_blocks_cached() hits the read extent cache, map->m_multidev_dio is not updated, which leads to incorrect multidevice information being reported by trace_f2fs_map_blocks(). This patch updates map->m_multidev_dio in f2fs_map_blocks_cached() when the read extent cache is hit. Cc: stable@kernel.org Fixes: 0094e98bd147 ("f2fs: factor a f2fs_map_blocks_cached helper") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cfdd7bbd7f359..f468cba89f303 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1508,7 +1508,8 @@ static bool f2fs_map_blocks_cached(struct inode *inode, f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); - if (f2fs_allow_multi_device_dio(sbi, flag)) { + map->m_multidev_dio = f2fs_allow_multi_device_dio(sbi, flag); + if (map->m_multidev_dio) { int bidx = f2fs_target_device_index(sbi, map->m_pblk); struct f2fs_dev_info *dev = &sbi->devs[bidx]; From b0e4395870eb3441ddc959f6710b5f6ca61aff26 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Fri, 3 Apr 2026 22:40:17 +0800 Subject: [PATCH 510/554] f2fs: fix node_cnt race between extent node destroy and writeback commit ed78aeebef05212ef7dca93bd931e4eff67c113f upstream. f2fs_destroy_extent_node() does not set FI_NO_EXTENT before clearing extent nodes. When called from f2fs_drop_inode() with I_SYNC set, concurrent kworker writeback can insert new extent nodes into the same extent tree, racing with the destroy and triggering f2fs_bug_on() in __destroy_extent_node(). The scenario is as follows: drop inode writeback - iput - f2fs_drop_inode // I_SYNC set - f2fs_destroy_extent_node - __destroy_extent_node - while (node_cnt) { write_lock(&et->lock) __free_extent_tree write_unlock(&et->lock) - __writeback_single_inode - f2fs_outplace_write_data - f2fs_update_read_extent_cache - __update_extent_tree_range // FI_NO_EXTENT not set, // insert new extent node } // node_cnt == 0, exit while - f2fs_bug_on(node_cnt) // node_cnt > 0 Additionally, __update_extent_tree_range() only checks FI_NO_EXTENT for EX_READ type, leaving EX_BLOCK_AGE updates completely unprotected. This patch set FI_NO_EXTENT under et->lock in __destroy_extent_node(), consistent with other callers (__update_extent_tree_range and __drop_extent_tree) and check FI_NO_EXTENT for both EX_READ and EX_BLOCK_AGE tree. Fixes: 3fc5d5a182f6 ("f2fs: fix to shrink read extent node in batches") Cc: stable@vger.kernel.org Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/extent_cache.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 0ed84cc065a7e..87169fd29d897 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -119,9 +119,10 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type) if (!__init_may_extent_tree(inode, type)) return false; + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return false; + if (type == EX_READ) { - if (is_inode_flag_set(inode, FI_NO_EXTENT)) - return false; if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && !f2fs_sb_has_readonly(F2FS_I_SB(inode))) return false; @@ -644,6 +645,8 @@ static unsigned int __destroy_extent_node(struct inode *inode, while (atomic_read(&et->node_cnt)) { write_lock(&et->lock); + if (!is_inode_flag_set(inode, FI_NO_EXTENT)) + set_inode_flag(inode, FI_NO_EXTENT); node_cnt += __free_extent_tree(sbi, et, nr_shrink); write_unlock(&et->lock); } @@ -688,12 +691,12 @@ static void __update_extent_tree_range(struct inode *inode, write_lock(&et->lock); - if (type == EX_READ) { - if (is_inode_flag_set(inode, FI_NO_EXTENT)) { - write_unlock(&et->lock); - return; - } + if (is_inode_flag_set(inode, FI_NO_EXTENT)) { + write_unlock(&et->lock); + return; + } + if (type == EX_READ) { prev = et->largest; dei.len = 0; From 8d269aae6f73683b47dc1039d38f7dc73ce3219c Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Fri, 10 Apr 2026 20:47:26 +0800 Subject: [PATCH 511/554] f2fs: fix uninitialized kobject put in f2fs_init_sysfs() commit b635f2ecdb5ad34f9c967cabb704d6bed9382fd0 upstream. In f2fs_init_sysfs(), all failure paths after kset_register() jump to put_kobject, which unconditionally releases both f2fs_tune and f2fs_feat. If kobject_init_and_add(&f2fs_feat, ...) fails, f2fs_tune has not been initialized yet, so calling kobject_put(&f2fs_tune) is invalid. Fix this by splitting the unwind path so each error path only releases objects that were successfully initialized. Fixes: a907f3a68ee26ba4 ("f2fs: add a sysfs entry to reclaim POSIX_FADV_NOREUSE pages") Cc: stable@vger.kernel.org Signed-off-by: Guangshuo Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/sysfs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index c6fa8f74b91c2..a75c3ef300bd2 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1935,24 +1935,26 @@ int __init f2fs_init_sysfs(void) ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype, NULL, "features"); if (ret) - goto put_kobject; + goto unregister_kset; ret = kobject_init_and_add(&f2fs_tune, &f2fs_tune_ktype, NULL, "tuning"); if (ret) - goto put_kobject; + goto put_feat; f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); if (!f2fs_proc_root) { ret = -ENOMEM; - goto put_kobject; + goto put_tune; } return 0; -put_kobject: +put_tune: kobject_put(&f2fs_tune); +put_feat: kobject_put(&f2fs_feat); +unregister_kset: kset_unregister(&f2fs_kset); return ret; } From 4bdc05b2032bafb6e652a1fc867e0f15d851e79e Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Wed, 18 Mar 2026 16:45:32 +0800 Subject: [PATCH 512/554] f2fs: refactor f2fs_move_node_folio function commit 92c20989366e023b74fa0c1028af9436c1917dbf upstream. This patch refactor the f2fs_move_node_folio() function. No logical changes. Cc: stable@kernel.org Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/node.c | 54 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e14f609e16d59..1c7465ab49b5e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1821,41 +1821,51 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted return false; } -int f2fs_move_node_folio(struct folio *node_folio, int gc_type) +static int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, + bool mark_dirty, enum iostat_type io_type) { int err = 0; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 1, + }; - if (gc_type == FG_GC) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 1, - }; + if (!sync_mode) { + /* set page dirty and write it */ + if (!folio_test_writeback(node_folio)) + folio_mark_dirty(node_folio); + goto out_folio; + } - f2fs_folio_wait_writeback(node_folio, NODE, true, true); + f2fs_folio_wait_writeback(node_folio, NODE, true, true); + if (mark_dirty) folio_mark_dirty(node_folio); + else if (!folio_test_dirty(node_folio)) + goto out_folio; - if (!folio_clear_dirty_for_io(node_folio)) { - err = -EAGAIN; - goto out_page; - } - - if (!__write_node_folio(node_folio, false, NULL, - &wbc, false, FS_GC_NODE_IO, NULL)) - err = -EAGAIN; - goto release_page; - } else { - /* set page dirty and write it */ - if (!folio_test_writeback(node_folio)) - folio_mark_dirty(node_folio); + if (!folio_clear_dirty_for_io(node_folio)) { + err = -EAGAIN; + goto out_folio; } -out_page: + + if (!__write_node_folio(node_folio, false, NULL, + &wbc, false, FS_GC_NODE_IO, NULL)) + err = -EAGAIN; + goto release_folio; +out_folio: folio_unlock(node_folio); -release_page: +release_folio: f2fs_folio_put(node_folio, false); return err; } +int f2fs_move_node_folio(struct folio *node_folio, int gc_type) +{ + return f2fs_write_single_node_folio(node_folio, gc_type == FG_GC, + true, FS_GC_NODE_IO); +} + int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic, unsigned int *seq_id) From 03806ec9c4cd5405780e127ab3eb86e724f1b151 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Wed, 18 Mar 2026 16:46:35 +0800 Subject: [PATCH 513/554] f2fs: fix inline data not being written to disk in writeback path commit fe9b8b30b97102859a9102be7bd2a09803bd90bd upstream. When f2fs_fiemap() is called with `fileinfo->fi_flags` containing the FIEMAP_FLAG_SYNC flag, it attempts to write data to disk before retrieving file mappings via filemap_write_and_wait(). However, there is an issue where the file does not get mapped as expected. The following scenario can occur: root@vm:/mnt/f2fs# dd if=/dev/zero of=data.3k bs=3k count=1 root@vm:/mnt/f2fs# xfs_io data.3k -c "fiemap -v 0 4096" data.3k: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..5]: 0..5 6 0x307 The root cause of this issue is that f2fs_write_single_data_page() only calls f2fs_write_inline_data() to copy data from the data folio to the inode folio, and it clears the dirty flag on the data folio. However, it does not mark the data folio as writeback. When __filemap_fdatawait_range() checks for folios with the writeback flag, it returns early, causing f2fs_fiemap() to report that the file has no mapping. To fix this issue, the solution is to call f2fs_write_single_node_folio() in f2fs_inline_data_fiemap() when getting fiemap with FIEMAP_FLAG_SYNC flags. This patch ensures that the inode folio is written back and the writeback process completes before proceeding. Cc: stable@kernel.org Fixes: 9ffe0fb5f3bb ("f2fs: handle inline data operations") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/inline.c | 9 +++++++++ fs/f2fs/node.c | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 77ff143624f9c..939bd1fb57070 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3888,6 +3888,8 @@ int f2fs_sanity_check_node_footer(struct f2fs_sb_info *sbi, enum node_type ntype, bool in_irq); struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino); struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid); +int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, + bool mark_dirty, enum iostat_type io_type); int f2fs_move_node_folio(struct folio *node_folio, int gc_type); void f2fs_flush_inline_data(struct f2fs_sb_info *sbi); int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e483584c8ded4..ea5680406e23d 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -812,6 +812,15 @@ int f2fs_inline_data_fiemap(struct inode *inode, goto out; } + if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { + err = f2fs_write_single_node_folio(ifolio, true, false, FS_NODE_IO); + if (err) + return err; + ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); + } ilen = min_t(size_t, MAX_INLINE_DATA(inode), i_size_read(inode)); if (start >= ilen) goto out; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1c7465ab49b5e..e136c8b16d29e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1821,7 +1821,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted return false; } -static int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, +int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, bool mark_dirty, enum iostat_type io_type) { int err = 0; From 8be551f538dc5b64183e27bd45a7a0795263f760 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Wed, 18 Mar 2026 16:45:34 +0800 Subject: [PATCH 514/554] f2fs: fix fsck inconsistency caused by FGGC of node block commit c3e238bd1f56993f205ef83889d406dfeaf717a8 upstream. During FGGC node block migration, fsck may incorrectly treat the migrated node block as fsync-written data. The reproduction scenario: root@vm:/mnt/f2fs# seq 1 2048 | xargs -n 1 ./test_sync // write inline inode and sync root@vm:/mnt/f2fs# rm -f 1 root@vm:/mnt/f2fs# sync root@vm:/mnt/f2fs# f2fs_io gc_range // move data block in sync mode and not write CP SPO, "fsck --dry-run" find inode has already checkpointed but still with DENT_BIT_SHIFT set The root cause is that GC does not clear the dentry mark and fsync mark during node block migration, leading fsck to misinterpret them as user-issued fsync writes. In BGGC mode, node block migration is handled by f2fs_sync_node_pages(), which guarantees the dentry and fsync marks are cleared before writing. This patch move the set/clear of the fsync|dentry marks into __write_node_folio to make the logic clearer, and ensures the fsync|dentry mark is cleared in FGGC. Cc: stable@kernel.org Fixes: da011cc0da8c ("f2fs: move node pages only in victim section during GC") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/node.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e136c8b16d29e..ca3dad7418b26 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1709,9 +1709,10 @@ static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) return last_folio; } -static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted, - struct writeback_control *wbc, bool do_balance, - enum iostat_type io_type, unsigned int *seq_id) +static bool __write_node_folio(struct folio *folio, bool atomic, bool do_fsync, + bool *submitted, struct writeback_control *wbc, + bool do_balance, enum iostat_type io_type, + unsigned int *seq_id) { struct f2fs_sb_info *sbi = F2FS_F_SB(folio); nid_t nid; @@ -1783,6 +1784,8 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; + set_dentry_mark(folio, false); + set_fsync_mark(folio, do_fsync); if (IS_INODE(folio) && (atomic || is_fsync_dnode(folio))) set_dentry_mark(folio, f2fs_need_dentry_mark(sbi, ino_of_node(folio))); @@ -1849,7 +1852,7 @@ int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, goto out_folio; } - if (!__write_node_folio(node_folio, false, NULL, + if (!__write_node_folio(node_folio, false, false, NULL, &wbc, false, FS_GC_NODE_IO, NULL)) err = -EAGAIN; goto release_folio; @@ -1896,6 +1899,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, for (i = 0; i < nr_folios; i++) { struct folio *folio = fbatch.folios[i]; bool submitted = false; + bool do_fsync = false; if (unlikely(f2fs_cp_error(sbi))) { f2fs_folio_put(last_folio, false); @@ -1926,11 +1930,8 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, f2fs_folio_wait_writeback(folio, NODE, true, true); - set_fsync_mark(folio, 0); - set_dentry_mark(folio, 0); - if (!atomic || folio == last_folio) { - set_fsync_mark(folio, 1); + do_fsync = true; percpu_counter_inc(&sbi->rf_node_block_count); if (IS_INODE(folio)) { if (is_inode_flag_set(inode, @@ -1947,8 +1948,9 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, if (!__write_node_folio(folio, atomic && folio == last_folio, - &submitted, wbc, true, - FS_NODE_IO, seq_id)) { + do_fsync, &submitted, + wbc, true, FS_NODE_IO, + seq_id)) { f2fs_folio_put(last_folio, false); folio_batch_release(&fbatch); ret = -EIO; @@ -2148,10 +2150,7 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - set_fsync_mark(folio, 0); - set_dentry_mark(folio, 0); - - if (!__write_node_folio(folio, false, &submitted, + if (!__write_node_folio(folio, false, false, &submitted, wbc, do_balance, io_type, NULL)) { folio_batch_release(&fbatch); ret = -EIO; From 837db077779280ec417b8d79832387f340380bd2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 23 Apr 2026 17:36:07 +0100 Subject: [PATCH 515/554] KVM: arm64: Wake-up from WFI when iqrchip is in userspace commit 4ce98bf0865c349e7026ad9c14f48da264920953 upstream. It appears that there is nothing in the wake-up path that evaluates whether the in-kernel interrupts are pending unless we have a vgic. This means that the userspace irqchip support has been broken for about four years, and nobody noticed. It was also broken before as we wouldn't wake-up on a PMU interrupt, but hey, who cares... It is probably time to remove the feature altogether, because it was a terrible idea 10 years ago, and it still is. Fixes: b57de4ffd7c6d ("KVM: arm64: Simplify kvm_cpu_has_pending_timer()") Link: https://patch.msgid.link/20260423163607.486345-1-maz@kernel.org Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/arm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 052bf0d4d0b03..afc77977d4b94 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -755,6 +755,10 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF | HCR_VSE); + irq_lines |= (!irqchip_in_kernel(v->kvm) && + (kvm_timer_should_notify_user(v) || + kvm_pmu_should_notify_user(v))); + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); } From 7b99369b2c7a553f67eed3967d6486df8bbac521 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 7 Apr 2026 21:27:02 +0100 Subject: [PATCH 516/554] KVM: arm64: vgic: Fix IIDR revision field extracted from wrong value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a0e6ae45af17e8b27958830595799c702ffbab8d upstream. The uaccess write handlers for GICD_IIDR in both GICv2 and GICv3 extract the revision field from 'reg' (the current IIDR value read back from the emulated distributor) instead of 'val' (the value userspace is trying to write). This means userspace can never actually change the implementation revision — the extracted value is always the current one. Fix the FIELD_GET to use 'val' so that userspace can select a different revision for migration compatibility. Fixes: 49a1a2c70a7f ("KVM: arm64: vgic-v3: Advertise GICR_CTLR.{IR, CES} as a new GICD_IIDR revision") Signed-off-by: David Woodhouse Link: https://patch.msgid.link/20260407210949.2076251-2-dwmw2@infradead.org Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/vgic/vgic-mmio-v2.c | 2 +- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c index f25fccb1f8e63..3307c5ffcaaca 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -91,7 +91,7 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu, * migration from old kernels to new kernels with legacy * userspace. */ - reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg); + reg = FIELD_GET(GICD_IIDR_REVISION_MASK, val); switch (reg) { case KVM_VGIC_IMP_REV_2: case KVM_VGIC_IMP_REV_3: diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 70d50c77e5dc7..051d9bf73879c 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -194,7 +194,7 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK) return -EINVAL; - reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg); + reg = FIELD_GET(GICD_IIDR_REVISION_MASK, val); switch (reg) { case KVM_VGIC_IMP_REV_2: case KVM_VGIC_IMP_REV_3: From 81e7907fe542fbbf11a33484e41ff401e0a74724 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 24 Apr 2026 09:49:08 +0100 Subject: [PATCH 517/554] KVM: arm64: Fix initialisation order in __pkvm_init_finalise() commit 5bb0aed57ba944f8c201e4e82ec066e0187e0f85 upstream. fix_host_ownership() walks the hypervisor's stage-1 page-table to adjust the host's stage-2 accordingly. Any such adjustment that requires cache maintenance operations depends on the per-CPU hyp fixmap being present. However, fix_host_ownership() is currently called before fix_hyp_pgtable_refcnt() and hyp_create_fixmap(), so the fixmap does not yet exist when it runs. This is benign today because the host stage-2 starts empty and no CMOs are needed, but it becomes a latent crash as soon as fix_host_ownership() is extended to operate on a non-empty page-table. Reorder the calls so that fix_hyp_pgtable_refcnt() and hyp_create_fixmap() complete before fix_host_ownership() is invoked. Fixes: 0d16d12eb26e ("KVM: arm64: Fix-up hyp stage-1 refcounts for all pages mapped at EL2") Signed-off-by: Quentin Perret Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-7-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/hyp/nvhe/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 90bd014e952fb..97643fc02d925 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -312,15 +312,15 @@ void __noreturn __pkvm_init_finalise(void) }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; - ret = fix_host_ownership(); + ret = fix_hyp_pgtable_refcnt(); if (ret) goto out; - ret = fix_hyp_pgtable_refcnt(); + ret = hyp_create_fixmap(); if (ret) goto out; - ret = hyp_create_fixmap(); + ret = fix_host_ownership(); if (ret) goto out; From 7e9f5f62f8d152957e03dde5ca4586377df8ac11 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:05 +0100 Subject: [PATCH 518/554] KVM: arm64: Fix FEAT_SPE_FnE to use PMSIDR_EL1.FnE, not PMSVer commit 08d715338287a1affb4c7ad5733decef4558a5c8 upstream. FEAT_SPE_FnE is architecturally detected via PMSIDR_EL1.FnE [6], not ID_AA64DFR0_EL1.PMSVer. The FEAT_X macro form (register, field, value) cannot encode a PMSIDR_EL1-based feature, so FEAT_SPE_FnE was defined identically to FEAT_SPEv1p2 (ID_AA64DFR0_EL1, PMSVer, V1P2), producing a duplicate that used PMSVer >= V1P2 as a proxy. Replace the macro with feat_spe_fne(), following the same pattern as the sibling feat_spe_fds(): guard on FEAT_SPEv1p2 and read PMSIDR_EL1.FnE [6] directly. Wire the two NEEDS_FEAT consumers to use the new function. Remove the now-unused FEAT_SPE_FnE macro. Fixes: 63d423a7635b ("KVM: arm64: Switch to table-driven FGU configuration") Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-4-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/config.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 24bb3f36e9d59..b6ac8832ee4cd 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -127,7 +127,6 @@ struct reg_feat_map_desc { } #define FEAT_SPE ID_AA64DFR0_EL1, PMSVer, IMP -#define FEAT_SPE_FnE ID_AA64DFR0_EL1, PMSVer, V1P2 #define FEAT_BRBE ID_AA64DFR0_EL1, BRBE, IMP #define FEAT_TRC_SR ID_AA64DFR0_EL1, TraceVer, IMP #define FEAT_PMUv3 ID_AA64DFR0_EL1, PMUVer, IMP @@ -294,6 +293,16 @@ static bool feat_spe_fds(struct kvm *kvm) (read_sysreg_s(SYS_PMSIDR_EL1) & PMSIDR_EL1_FDS)); } +static bool feat_spe_fne(struct kvm *kvm) +{ + /* + * Revisit this if KVM ever supports SPE -- this really should + * look at the guest's view of PMSIDR_EL1. + */ + return (kvm_has_feat(kvm, FEAT_SPEv1p2) && + (read_sysreg_s(SYS_PMSIDR_EL1) & PMSIDR_EL1_FnE)); +} + static bool feat_trbe_mpam(struct kvm *kvm) { /* @@ -547,7 +556,7 @@ static const struct reg_bits_to_feat_map hdfgrtr_feat_map[] = { HDFGRTR_EL2_PMBPTR_EL1 | HDFGRTR_EL2_PMBLIMITR_EL1, FEAT_SPE), - NEEDS_FEAT(HDFGRTR_EL2_nPMSNEVFR_EL1, FEAT_SPE_FnE), + NEEDS_FEAT(HDFGRTR_EL2_nPMSNEVFR_EL1, feat_spe_fne), NEEDS_FEAT(HDFGRTR_EL2_nBRBDATA | HDFGRTR_EL2_nBRBCTL | HDFGRTR_EL2_nBRBIDR, @@ -615,7 +624,7 @@ static const struct reg_bits_to_feat_map hdfgwtr_feat_map[] = { HDFGWTR_EL2_PMBPTR_EL1 | HDFGWTR_EL2_PMBLIMITR_EL1, FEAT_SPE), - NEEDS_FEAT(HDFGWTR_EL2_nPMSNEVFR_EL1, FEAT_SPE_FnE), + NEEDS_FEAT(HDFGWTR_EL2_nPMSNEVFR_EL1, feat_spe_fne), NEEDS_FEAT(HDFGWTR_EL2_nBRBDATA | HDFGWTR_EL2_nBRBCTL, FEAT_BRBE), From 931c9c777e35d2f1871f6a6fa5dd049e08df7ade Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:03 +0100 Subject: [PATCH 519/554] KVM: arm64: Fix FEAT_Debugv8p9 to check DebugVer, not PMUVer commit 7fe2cd4e1a3ad230d8fcc00cc99c4bcce4412a75 upstream. FEAT_Debugv8p9 is incorrectly defined against ID_AA64DFR0_EL1.PMUVer instead of ID_AA64DFR0_EL1.DebugVer. All three consumers of the macro gate features that are architecturally tied to FEAT_Debugv8p9 (DebugVer = 0b1011, DDI0487 M.b A2.2.10): - HDFGRTR2_EL2.nMDSELR_EL1, HDFGWTR2_EL2.nMDSELR_EL1: MDSELR_EL1 is present only when FEAT_Debugv8p9 is implemented (D24.3.21). - MDCR_EL2.EBWE: the Extended Breakpoint and Watchpoint Enable bit is RES0 unless FEAT_Debugv8p9 is implemented (D24.3.17). Neither register has any dependency on PMUVer. FEAT_Debugv8p9 and FEAT_PMUv3p9 are independent. Per DDI0487 M.b A2.2.10, FEAT_Debugv8p9 is unconditionally mandatory from Armv8.9, whereas FEAT_PMUv3p9 is mandatory only when FEAT_PMUv3 is implemented. An Armv8.9 CPU without a PMU has DebugVer = 0b1011 but PMUVer = 0b0000, so the wrong field check would cause KVM to incorrectly treat EBWE and MDSELR_EL1 as RES0 on such hardware. Fixes: 4bc0fe089840 ("KVM: arm64: Add sanitisation for FEAT_FGT2 registers") Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-2-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index b6ac8832ee4cd..cd3e34389c08d 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -187,7 +187,7 @@ struct reg_feat_map_desc { #define FEAT_SRMASK ID_AA64MMFR4_EL1, SRMASK, IMP #define FEAT_PoPS ID_AA64MMFR4_EL1, PoPS, IMP #define FEAT_PFAR ID_AA64PFR1_EL1, PFAR, IMP -#define FEAT_Debugv8p9 ID_AA64DFR0_EL1, PMUVer, V3P9 +#define FEAT_Debugv8p9 ID_AA64DFR0_EL1, DebugVer, V8P9 #define FEAT_PMUv3_SS ID_AA64DFR0_EL1, PMSS, IMP #define FEAT_SEBEP ID_AA64DFR0_EL1, SEBEP, IMP #define FEAT_EBEP ID_AA64DFR1_EL1, EBEP, IMP From 7d3c27b54253cda91dc4d2c1bfc109c490837ab9 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:07 +0100 Subject: [PATCH 520/554] KVM: arm64: Fix pin leak and publication ordering in __pkvm_init_vcpu() commit 73b9c1e5da84cd69b1a86e374e450817cd051371 upstream. Two bugs exist in the vCPU initialisation path: 1. If a check fails after hyp_pin_shared_mem() succeeds, the cleanup path jumps to 'unlock' without calling unpin_host_vcpu() or unpin_host_sve_state(), permanently leaking pin references on the host vCPU and SVE state pages. Extract a register_hyp_vcpu() helper that performs the checks and the store. When register_hyp_vcpu() returns an error, call unpin_host_vcpu() and unpin_host_sve_state() inline before falling through to the existing 'unlock' label. 2. register_hyp_vcpu() publishes the new vCPU pointer into 'hyp_vm->vcpus[]' with a bare store, allowing a concurrent caller of pkvm_load_hyp_vcpu() to observe a partially initialised vCPU object. Ensure the store uses smp_store_release() and the load uses smp_load_acquire(). While 'vm_table_lock' currently serialises the store and the load, these barriers ensure the reader sees the fully initialised 'hyp_vcpu' object even if there were a lockless path or if the lock's own ordering guarantees were insufficient for nested object initialization. Fixes: 49af6ddb8e5c ("KVM: arm64: Add infrastructure to create and track pKVM instances at EL2") Reported-by: Ben Simner Co-developed-by: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-6-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 38 ++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index d866f6ba19b5f..cafd09565f83b 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -259,7 +259,8 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, if (!hyp_vm || hyp_vm->kvm.created_vcpus <= vcpu_idx) goto unlock; - hyp_vcpu = hyp_vm->vcpus[vcpu_idx]; + /* Pairs with smp_store_release() in register_hyp_vcpu(). */ + hyp_vcpu = smp_load_acquire(&hyp_vm->vcpus[vcpu_idx]); if (!hyp_vcpu) goto unlock; @@ -801,12 +802,30 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, * the page-aligned size of 'struct pkvm_hyp_vcpu'. * Return 0 on success, negative error code on failure. */ +static int register_hyp_vcpu(struct pkvm_hyp_vm *hyp_vm, + struct pkvm_hyp_vcpu *hyp_vcpu) +{ + unsigned int idx = hyp_vcpu->vcpu.vcpu_idx; + + if (idx >= hyp_vm->kvm.created_vcpus) + return -EINVAL; + + if (hyp_vm->vcpus[idx]) + return -EINVAL; + + /* + * Ensure the hyp_vcpu is initialised before publishing it to + * the vCPU-load path via 'hyp_vm->vcpus[]'. + */ + smp_store_release(&hyp_vm->vcpus[idx], hyp_vcpu); + return 0; +} + int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, unsigned long vcpu_hva) { struct pkvm_hyp_vcpu *hyp_vcpu; struct pkvm_hyp_vm *hyp_vm; - unsigned int idx; int ret; hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu)); @@ -825,18 +844,11 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, if (ret) goto unlock; - idx = hyp_vcpu->vcpu.vcpu_idx; - if (idx >= hyp_vm->kvm.created_vcpus) { - ret = -EINVAL; - goto unlock; - } - - if (hyp_vm->vcpus[idx]) { - ret = -EINVAL; - goto unlock; + ret = register_hyp_vcpu(hyp_vm, hyp_vcpu); + if (ret) { + unpin_host_vcpu(host_vcpu); + unpin_host_sve_state(hyp_vcpu); } - - hyp_vm->vcpus[idx] = hyp_vcpu; unlock: hyp_spin_unlock(&vm_table_lock); From 9e1aed63a5552958ef2a9bfd699a3f990e52a77f Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Mon, 4 May 2026 09:00:20 +0800 Subject: [PATCH 521/554] LoongArch: Fix potential ADE in loongson_gpu_fixup_dma_hang() commit 8dfa2f8780e486d05b9a0ffce70b8f5fbd62053e upstream. The switch case in loongson_gpu_fixup_dma_hang() may not DC2 or DC3, and readl(crtc_reg) will access with random address, because the "device" is from "base+PCI_DEVICE_ID", "base" is from "pdev->devfn+1". This is wrong when my platform inserts a discrete GPU: lspci -tv -[0000:00]-+-00.0 Loongson Technology LLC Hyper Transport Bridge Controller ... +-06.0 Loongson Technology LLC LG100 GPU +-06.2 Loongson Technology LLC Device 7a37 ... Add a default switch case to fix the panic as below: Kernel ade access[#1]: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.136-loong64-desktop-hwe+ #4 pc 90000000017e5534 ra 90000000017e54c0 tp 90000001002f8000 sp 90000001002fb6c0 a0 80000efe00003100 a1 0000000000003100 a2 0000000000000000 a3 0000000000000002 a4 90000001002fb6b4 a5 900000087cdb58fd a6 90000000027af000 a7 0000000000000001 t0 00000000000085b9 t1 000000000000ffff t2 0000000000000000 t3 0000000000000000 t4 fffffffffffffffd t5 00000000fffb6d9c t6 0000000000083b00 t7 00000000000070c0 t8 900000087cdb4d94 u0 900000087cdb58fd s9 90000001002fb826 s0 90000000031c12c8 s1 7fffffffffffff00 s2 90000000031c12d0 s3 0000000000002710 s4 0000000000000000 s5 0000000000000000 s6 9000000100053000 s7 7fffffffffffff00 s8 90000000030d4000 ra: 90000000017e54c0 loongson_gpu_fixup_dma_hang+0x40/0x210 ERA: 90000000017e5534 loongson_gpu_fixup_dma_hang+0xb4/0x210 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) ESTAT: 00480000 [ADEM] (IS= ECode=8 EsubCode=1) BADV: 7fffffffffffff00 PRID: 0014d000 (Loongson-64bit, Loongson-3A6000-HV) Modules linked in: Process swapper/0 (pid: 1, threadinfo=(____ptrval____), task=(____ptrval____)) Stack : 0000000000000006 90000001002fb778 90000001002fb704 0000000000000007 0000000016a65700 90000000017e5690 000000000000ffff ffffffffffffffff 900000000209f7c0 9000000100053000 900000000209f7a8 9000000000eebc08 0000000000000000 0000000000000000 0000000000000006 90000001002fb778 90000001000530b8 90000000027af000 0000000000000000 9000000100054000 9000000100053000 9000000000ebb70c 9000000100004c00 9000000004000001 90000001002fb7e4 bae765461f31cb12 0000000000000000 0000000000000000 0000000000000006 90000000027af000 0000000000000030 90000000027af000 900000087cd6f800 9000000100053000 0000000000000000 9000000000ebc560 7a2500147cdaf720 bae765461f31cb12 0000000000000001 0000000000000030 ... Call Trace: [<90000000017e5534>] loongson_gpu_fixup_dma_hang+0xb4/0x210 [<9000000000eebc08>] pci_fixup_device+0x108/0x280 [<9000000000ebb70c>] pci_setup_device+0x24c/0x690 [<9000000000ebc560>] pci_scan_single_device+0xe0/0x140 [<9000000000ebc684>] pci_scan_slot+0xc4/0x280 [<9000000000ebdd00>] pci_scan_child_bus_extend+0x60/0x3f0 [<9000000000f5bc94>] acpi_pci_root_create+0x2b4/0x420 [<90000000017e5e74>] pci_acpi_scan_root+0x2d4/0x440 [<9000000000f5b02c>] acpi_pci_root_add+0x21c/0x3a0 [<9000000000f4ee54>] acpi_bus_attach+0x1a4/0x3c0 [<90000000010e200c>] device_for_each_child+0x6c/0xe0 [<9000000000f4bbf4>] acpi_dev_for_each_child+0x44/0x70 [<9000000000f4ef40>] acpi_bus_attach+0x290/0x3c0 [<90000000010e200c>] device_for_each_child+0x6c/0xe0 [<9000000000f4bbf4>] acpi_dev_for_each_child+0x44/0x70 [<9000000000f4ef40>] acpi_bus_attach+0x290/0x3c0 [<9000000000f5211c>] acpi_bus_scan+0x6c/0x280 [<900000000189c028>] acpi_scan_init+0x194/0x310 [<900000000189bc6c>] acpi_init+0xcc/0x140 [<9000000000220cdc>] do_one_initcall+0x4c/0x310 [<90000000018618fc>] kernel_init_freeable+0x258/0x2d4 [<900000000184326c>] kernel_init+0x28/0x13c [<9000000000222008>] ret_from_kernel_thread+0xc/0xa4 Cc: stable@vger.kernel.org Fixes: 95db0c9f526d ("LoongArch: Workaround LS2K/LS7A GPU DMA hang bug") Link: https://gist.github.com/opsiff/ebf2dac51b4013d22462f2124c55f807 Link: https://gist.github.com/opsiff/a62f2a73db0492b3c49bf223a339b133 Signed-off-by: Wentao Guan Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/pci/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index d233ea2218fe0..f33c7ea1443d9 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -132,6 +132,9 @@ static void loongson_gpu_fixup_dma_hang(struct pci_dev *pdev, bool on) crtc_reg = regbase; crtc_offset = 0x400; break; + default: + iounmap(regbase); + return; } for (i = 0; i < CRTC_NUM_MAX; i++, crtc_reg += crtc_offset) { From e50d8573534fa3221a1ab4a3f8a864480c1ba93b Mon Sep 17 00:00:00 2001 From: Qiang Ma Date: Mon, 4 May 2026 09:00:37 +0800 Subject: [PATCH 522/554] LoongArch: KVM: Cap KVM_CAP_NR_VCPUS by KVM_CAP_MAX_VCPUS commit b3e31a6650d4cab63f0814c37c0b360372c6ee9e upstream. It doesn't make sense to return the recommended maximum number of vCPUs which exceeds the maximum possible number of vCPUs. Other architectures have already done this, such as commit 57a2e13ebdda ("KVM: MIPS: Cap KVM_CAP_NR_VCPUS by KVM_CAP_MAX_VCPUS") Cc: stable@vger.kernel.org Reviewed-by: Bibo Mao Signed-off-by: Qiang Ma Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kvm/vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 85246a70d95d3..e86a32fffcddd 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -94,7 +94,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; case KVM_CAP_NR_VCPUS: - r = num_online_cpus(); + r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; From 1c32824fbdec3b0e165067cea0835e4546dcc87e Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Mon, 4 May 2026 09:00:37 +0800 Subject: [PATCH 523/554] LoongArch: KVM: Fix "unreliable stack" for kvm_exc_entry commit b323a441da602dfdfc24f30d3190cac786ffebf2 upstream. Insert the appropriate UNWIND hint into the kvm_exc_entry assembly function to guide the generation of correct ORC table entries, thereby solving the timeout problem ("unreliable stack") while loading the livepatch-sample module on a physical machine running virtual machines with multiple vcpus. Cc: stable@vger.kernel.org Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kvm/switch.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index f1768b7a61949..9eaad5dbed910 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -104,7 +104,7 @@ .text .cfi_sections .debug_frame SYM_CODE_START(kvm_exc_entry) - UNWIND_HINT_UNDEFINED + UNWIND_HINT_END_OF_STACK csrwr a2, KVM_TEMP_KS csrrd a2, KVM_VCPU_KS addi.d a2, a2, KVM_VCPU_ARCH From b715ceb72ed99ca1e88a33e8ad451e28bda707c2 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 4 May 2026 09:00:48 +0800 Subject: [PATCH 524/554] LoongArch: KVM: Fix HW timer interrupt lost when inject interrupt by software commit 2433f3f5724b3af569d9fb411ba728629524738b upstream. With passthrough HW timer, timer interrupt is injected by HW. When inject emulated CPU interrupt by software such SIP0/SIP1/IPI, HW timer interrupt may be lost. Here check whether there is timer tick value inversion before and after injecting emulated CPU interrupt by software, timer enabling by reading timer cfg register is skipped. If the timer tick value is detected with changing, then timer should be enabled. And inject a timer interrupt by software if there is. Cc: Fixes: f45ad5b8aa93 ("LoongArch: KVM: Implement vcpu interrupt operations"). Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kvm/interrupt.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/loongarch/kvm/interrupt.c b/arch/loongarch/kvm/interrupt.c index 8462083f03017..20f3e2072d52c 100644 --- a/arch/loongarch/kvm/interrupt.c +++ b/arch/loongarch/kvm/interrupt.c @@ -26,6 +26,7 @@ static unsigned int priority_to_irq[EXCCODE_INT_NUM] = { static int kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { unsigned int irq = 0; + unsigned long old, new; clear_bit(priority, &vcpu->arch.irq_pending); if (priority < EXCCODE_INT_NUM) @@ -36,7 +37,13 @@ static int kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority) case INT_IPI: case INT_SWI0: case INT_SWI1: + old = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); set_gcsr_estat(irq); + new = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); + + /* Inject TI if TVAL inverted */ + if (new > old) + set_gcsr_estat(CPU_TIMER); break; case INT_HWI0 ... INT_HWI7: @@ -53,6 +60,7 @@ static int kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority) static int kvm_irq_clear(struct kvm_vcpu *vcpu, unsigned int priority) { unsigned int irq = 0; + unsigned long old, new; clear_bit(priority, &vcpu->arch.irq_clear); if (priority < EXCCODE_INT_NUM) @@ -63,7 +71,13 @@ static int kvm_irq_clear(struct kvm_vcpu *vcpu, unsigned int priority) case INT_IPI: case INT_SWI0: case INT_SWI1: + old = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); clear_gcsr_estat(irq); + new = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); + + /* Inject TI if TVAL inverted */ + if (new > old) + set_gcsr_estat(CPU_TIMER); break; case INT_HWI0 ... INT_HWI7: From 3d44ed6dd219f1a3ec39c8897bf8da3b1664d704 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 4 May 2026 09:00:48 +0800 Subject: [PATCH 525/554] LoongArch: KVM: Move unconditional delay into timer clear scenery commit 5a873d77ba792410a796595a917be6a440f9b7d2 upstream. When timer interrupt arrives in guest kernel, guest kernel clears the timer interrupt and program timer with the next incoming event. During this stage, timer tick is -1 and timer interrupt status is disabled in ESTAT register. KVM hypervisor need write zero with timer tick register and wait timer interrupt injection from HW side, and then clear timer interrupt. So there is 2 cycle delay in KVM hypervisor to emulate such scenery, and the delay is unnecessary if there is no need to clear the timer interrupt. Here move 2 cycle delay into timer clear scenery and add timer ESTAT checking after delay, and set max timer expire value if timer interrupt does not arrive still. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kvm/timer.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 29c2aaba63c33..8356fce0043f6 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -96,15 +96,21 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) * and set CSR TVAL with -1 */ write_gcsr_timertick(0); - __delay(2); /* Wait cycles until timer interrupt injected */ /* * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear * timer interrupt, and CSR TVAL keeps unchanged with -1, it * avoids spurious timer interrupt */ - if (!(estat & CPU_TIMER)) + if (!(estat & CPU_TIMER)) { + __delay(2); /* Wait cycles until timer interrupt injected */ + + /* Write TVAL with max value if no TI shot */ + estat = kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT); + if (!(estat & CPU_TIMER)) + write_gcsr_timertick(CSR_TCFG_VAL); gcsr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + } return; } From aaaf41aee5ac69741f170758a3ed5454557b2b45 Mon Sep 17 00:00:00 2001 From: Tao Cui Date: Mon, 4 May 2026 09:00:38 +0800 Subject: [PATCH 526/554] LoongArch: KVM: Use kvm_set_pte() in kvm_flush_pte() commit 81e18777d61440511451866c7c80b34a8bdd6b33 upstream. kvm_flush_pte() is the only caller that directly assigns *pte instead of using the kvm_set_pte() wrapper. Use the wrapper for consistency with the rest of the file. No functional change intended. Cc: stable@vger.kernel.org Reviewed-by: Bibo Mao Signed-off-by: Tao Cui Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index a7fa458e33605..e104897aa5328 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -95,7 +95,7 @@ static int kvm_flush_pte(kvm_pte_t *pte, phys_addr_t addr, kvm_ptw_ctx *ctx) else kvm->stat.pages--; - *pte = ctx->invalid_entry; + kvm_set_pte(pte, ctx->invalid_entry); return 1; } From 23900db3787a614f5aef31cabe96d54b7d4a0c3a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 4 May 2026 09:00:20 +0800 Subject: [PATCH 527/554] LoongArch: Use per-root-bridge PCIH flag to skip mem resource fixup commit 49f33840dcc907d21313d369e34872880846b61c upstream. When firmware enables 64-bit PCI host bridge support, some root bridges already provide valid 64-bit mem resource windows through ACPI. In this case, the LoongArch-specific mem resource high-bits fixup in acpi_prepare_root_resources() should not be applied unconditionally. Otherwise, the kernel may override the native resource layout derived from firmware, and later BAR assignment can fail to place device BARs into the intended 64-bit address space correctly. Add a per-root-bridge ACPI flag, PCIH, and evaluate it from the current root bridge device scope. When PCIH is set, skip the mem resource high- bits fixup path and let the kernel use the firmware-provided resource description directly. When PCIH is absent or cleared, keep the existing behavior and continue filling the high address bits from the host bridge address. This makes the behavior per-root-bridge configurable and avoids breaking valid 64-bit BAR space allocation on bridges whose 64-bit windows have already been fully described by firmware. Cc: stable@vger.kernel.org Suggested-by: Chao Li Tested-by: Dongyan Qian Signed-off-by: Dongyan Qian Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/pci/acpi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index 50c9016641a48..a6d869e103c14 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -61,11 +61,16 @@ static void acpi_release_root_info(struct acpi_pci_root_info *ci) static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci) { int status; + unsigned long long pci_h = 0; struct resource_entry *entry, *tmp; struct acpi_device *device = ci->bridge; status = acpi_pci_probe_root_resources(ci); if (status > 0) { + acpi_evaluate_integer(device->handle, "PCIH", NULL, &pci_h); + if (pci_h) + return status; + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { if (entry->res->flags & IORESOURCE_MEM) { entry->offset = ci->root->mcfg_addr & GENMASK_ULL(63, 40); From 2563d7880c4df175e59f13c5881d99f8b51e3e70 Mon Sep 17 00:00:00 2001 From: Martin Michaelis Date: Thu, 23 Apr 2026 15:54:11 -0600 Subject: [PATCH 528/554] io_uring/kbuf: support min length left for incremental buffers Commit 7deba791ad495ce1d7921683f4f7d1190fa210d1 upstream. Incrementally consumed buffer rings are generally fully consumed, but it's quite possible that the application has a minimum size it needs to meet to avoid truncation. Currently that minimum limit is 1 byte, but this should be a setting that is the hands of the application. For recvmsg multishot, a prime use case for incrementally consumed buffers, the application may get spurious -EFAULT returned at the end of an incrementally consumed buffer, as less space is available than the headers need. Grab a u32 field in struct io_uring_buf_reg, which the application can use to inform the kernel of the minimum size that should be available in an incrementally consumed buffer. If less than that is available, the current buffer is fully processed and the next one will be picked. Cc: stable@vger.kernel.org Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption") Link: https://github.com/axboe/liburing/issues/1433 Signed-off-by: Martin Michaelis [axboe: write commit message, change io_buffer_list member name] Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/io_uring.h | 3 ++- io_uring/kbuf.c | 12 +++++++++--- io_uring/kbuf.h | 7 +++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b7c8dad266908..ef5b734b5a419 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -864,7 +864,8 @@ struct io_uring_buf_reg { __u32 ring_entries; __u16 bgid; __u16 flags; - __u64 resv[3]; + __u32 min_left; + __u32 resv[5]; }; /* argument for IORING_REGISTER_PBUF_STATUS */ diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 9e8166e24dc81..32d3b8d26bf00 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -47,9 +47,9 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) this_len = min_t(u32, len, buf_len); buf_len -= this_len; /* Stop looping for invalid buffer length of 0 */ - if (buf_len || !this_len) { - buf->addr = READ_ONCE(buf->addr) + this_len; - buf->len = buf_len; + if (buf_len > bl->min_left_sub_one || !this_len) { + WRITE_ONCE(buf->addr, READ_ONCE(buf->addr) + this_len); + WRITE_ONCE(buf->len, buf_len); return false; } buf->len = 0; @@ -637,6 +637,10 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (reg.ring_entries >= 65536) return -EINVAL; + /* minimum left byte count is a property of incremental buffers */ + if (!(reg.flags & IOU_PBUF_RING_INC) && reg.min_left) + return -EINVAL; + bl = io_buffer_get_list(ctx, reg.bgid); if (bl) { /* if mapped buffer ring OR classic exists, don't allow */ @@ -684,6 +688,8 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) bl->mask = reg.ring_entries - 1; bl->flags |= IOBL_BUF_RING; bl->buf_ring = br; + if (reg.min_left) + bl->min_left_sub_one = reg.min_left - 1; if (reg.flags & IOU_PBUF_RING_INC) bl->flags |= IOBL_INC; ret = io_buffer_add_list(ctx, bl, reg.bgid); diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index ada382ff38d7e..d1d00e040dfe1 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -34,6 +34,13 @@ struct io_buffer_list { __u16 flags; + /* + * minimum required amount to be left to reuse an incrementally + * consumed buffer. If less than this is left at consumption time, + * buffer is done and head is incremented to the next buffer. + */ + __u32 min_left_sub_one; + struct io_mapped_region region; }; From 737adda64f5142dd809c2b3f1d227fe5bb6cfd32 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Apr 2026 19:16:39 -0600 Subject: [PATCH 529/554] io_uring/tw: serialize ctx->retry_llist with ->uring_lock Commit 17666e2d7592c3e85260cafd3950121524acc2c5 upstream. The DEFER_TASKRUN local task work paths all run under ctx->uring_lock, which serializes them with each other and with the rest of the ring's hot paths. io_move_task_work_from_local() is the exception - it's called from io_ring_exit_work() on a kworker without holding the lock and from the iopoll cancelation side right after dropping it. ->work_llist is fine with this, as it's only ever updated via the expected paths. But the ->retry_llist is updated while runing, and hence it could potentially race between normal task_work running and the task-has-exited shutdown path. Simply grab ->uring_lock while moving the local work to the fallback list for exit purposes, which nicely serializes it across both the normal additions and the exit prune path. Cc: stable@vger.kernel.org Fixes: f46b9cdb22f7 ("io_uring: limit local tw done") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 99cd5bcac201d..03e7b9d6b448c 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1370,8 +1370,18 @@ void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags) static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx) { - struct llist_node *node = llist_del_all(&ctx->work_llist); + struct llist_node *node; + /* + * Running the work items may utilize ->retry_llist as a means + * for capping the number of task_work entries run at the same + * time. But that list can potentially race with moving the work + * from here, if the task is exiting. As any normal task_work + * running holds ->uring_lock already, just guard this slow path + * with ->uring_lock to avoid racing on ->retry_llist. + */ + guard(mutex)(&ctx->uring_lock); + node = llist_del_all(&ctx->work_llist); __io_fallback_tw(node, false); node = llist_del_all(&ctx->retry_llist); __io_fallback_tw(node, false); From d18099f19e53250f8ad2801498b88cec29d9107a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 13 Apr 2026 12:42:45 -0700 Subject: [PATCH 530/554] bpf: Fix use-after-free in arena_vm_close on fork commit 4fddde2a732de60bb97e3307d4eb69ac5f1d2b74 upstream. arena_vm_open() only bumps vml->mmap_count but never registers the child VMA in arena->vma_list. The vml->vma always points at the parent VMA, so after parent munmap the pointer dangles. If the child then calls bpf_arena_free_pages(), zap_pages() reads the stale vml->vma triggering use-after-free. Fix this by preventing the arena VMA from being inherited across fork with VM_DONTCOPY, and preventing VMA splits via the may_split callback. Also reject mremap with a .mremap callback returning -EINVAL. A same-size mremap(MREMAP_FIXED) on the full arena VMA reaches copy_vma() through the following path: check_prep_vma() - returns 0 early: new_len == old_len skips VM_DONTEXPAND check prep_move_vma() - vm_start == old_addr and vm_end == old_addr + old_len so may_split is never called move_vma() copy_vma_and_data() copy_vma() vm_area_dup() - copies vm_private_data (vml pointer) vm_ops->open() - bumps vml->mmap_count vm_ops->mremap() - returns -EINVAL, rollback unmaps new VMA The refcount ensures the rollback's arena_vm_close does not free the vml shared with the original VMA. Reported-by: Weiming Shi Reported-by: Xiang Mei Fixes: 317460317a02 ("bpf: Introduce bpf_arena.") Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260413194245.21449-1-alexei.starovoitov@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/arena.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 1074ac4459f2c..c67525847687f 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -246,6 +246,16 @@ static void arena_vm_open(struct vm_area_struct *vma) refcount_inc(&vml->mmap_count); } +static int arena_vm_may_split(struct vm_area_struct *vma, unsigned long addr) +{ + return -EINVAL; +} + +static int arena_vm_mremap(struct vm_area_struct *vma) +{ + return -EINVAL; +} + static void arena_vm_close(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; @@ -307,6 +317,8 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf) static const struct vm_operations_struct arena_vm_ops = { .open = arena_vm_open, + .may_split = arena_vm_may_split, + .mremap = arena_vm_mremap, .close = arena_vm_close, .fault = arena_vm_fault, }; @@ -376,10 +388,11 @@ static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) arena->user_vm_end = vma->vm_end; /* * bpf_map_mmap() checks that it's being mmaped as VM_SHARED and - * clears VM_MAYEXEC. Set VM_DONTEXPAND as well to avoid - * potential change of user_vm_start. + * clears VM_MAYEXEC. Set VM_DONTEXPAND to avoid potential change + * of user_vm_start. Set VM_DONTCOPY to prevent arena VMA from + * being copied into the child process on fork. */ - vm_flags_set(vma, VM_DONTEXPAND); + vm_flags_set(vma, VM_DONTEXPAND | VM_DONTCOPY); vma->vm_ops = &arena_vm_ops; return 0; } From 1de2db19a6028abe7d905875922faef5b873de67 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 11 Apr 2026 14:36:36 -0700 Subject: [PATCH 531/554] mm/damon/core: disallow non-power of two min_region_sz on damon_start() commit 95093e5cb4c5b50a5b1a4b79f2942b62744bd66a upstream. Commit d8f867fa0825 ("mm/damon: add damon_ctx->min_sz_region") introduced a bug that allows unaligned DAMON region address ranges. Commit c80f46ac228b ("mm/damon/core: disallow non-power of two min_region_sz") fixed it, but only for damon_commit_ctx() use case. Still, DAMON sysfs interface can emit non-power of two min_region_sz via damon_start(). Fix the path by adding the is_power_of_2() check on damon_start(). The issue was discovered by sashiko [1]. Link: https://lore.kernel.org/20260411213638.77768-1-sj@kernel.org Link: https://lore.kernel.org/20260403155530.64647-1-sj@kernel.org [1] Fixes: d8f867fa0825 ("mm/damon: add damon_ctx->min_sz_region") Signed-off-by: SeongJae Park Cc: # 6.18.x Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 7738095ea4ce3..c46236b73b2d6 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1352,6 +1352,11 @@ int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive) int i; int err = 0; + for (i = 0; i < nr_ctxs; i++) { + if (!is_power_of_2(ctxs[i]->min_sz_region)) + return -EINVAL; + } + mutex_lock(&damon_lock); if ((exclusive && nr_running_ctxs) || (!exclusive && running_exclusive_ctxs)) { From 25c2b77bc463f29ee71a54b883548baf9386a0db Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 4 May 2026 10:19:38 -0400 Subject: [PATCH 532/554] fbdev: defio: Disconnect deferred I/O from the lifetime of struct fb_info [ Upstream commit 9ded47ad003f09a94b6a710b5c47f4aa5ceb7429 ] Hold state of deferred I/O in struct fb_deferred_io_state. Allocate an instance as part of initializing deferred I/O and remove it only after the final mapping has been closed. If the fb_info and the contained deferred I/O meanwhile goes away, clear struct fb_deferred_io_state.info to invalidate the mapping. Any access will then result in a SIGBUS signal. Fixes a long-standing problem, where a device hot-unplug happens while user space still has an active mapping of the graphics memory. The hot- unplug frees the instance of struct fb_info. Accessing the memory will operate on undefined state. Signed-off-by: Thomas Zimmermann Fixes: 60b59beafba8 ("fbdev: mm: Deferred IO support") Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: stable@vger.kernel.org # v2.6.22+ Signed-off-by: Helge Deller [ replaced kzalloc_obj(*fbdefio_state) with kzalloc(sizeof(*fbdefio_state), GFP_KERNEL) ] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fb_defio.c | 178 ++++++++++++++++++++++------ include/linux/fb.h | 4 +- 2 files changed, 145 insertions(+), 37 deletions(-) diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index 8df2e51e33909..0b099a89a8234 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -24,6 +24,75 @@ #include #include +/* + * struct fb_deferred_io_state + */ + +struct fb_deferred_io_state { + struct kref ref; + + struct mutex lock; /* mutex that protects the pageref list */ + /* fields protected by lock */ + struct fb_info *info; +}; + +static struct fb_deferred_io_state *fb_deferred_io_state_alloc(void) +{ + struct fb_deferred_io_state *fbdefio_state; + + fbdefio_state = kzalloc(sizeof(*fbdefio_state), GFP_KERNEL); + if (!fbdefio_state) + return NULL; + + kref_init(&fbdefio_state->ref); + mutex_init(&fbdefio_state->lock); + + return fbdefio_state; +} + +static void fb_deferred_io_state_release(struct fb_deferred_io_state *fbdefio_state) +{ + mutex_destroy(&fbdefio_state->lock); + + kfree(fbdefio_state); +} + +static void fb_deferred_io_state_get(struct fb_deferred_io_state *fbdefio_state) +{ + kref_get(&fbdefio_state->ref); +} + +static void __fb_deferred_io_state_release(struct kref *ref) +{ + struct fb_deferred_io_state *fbdefio_state = + container_of(ref, struct fb_deferred_io_state, ref); + + fb_deferred_io_state_release(fbdefio_state); +} + +static void fb_deferred_io_state_put(struct fb_deferred_io_state *fbdefio_state) +{ + kref_put(&fbdefio_state->ref, __fb_deferred_io_state_release); +} + +/* + * struct vm_operations_struct + */ + +static void fb_deferred_io_vm_open(struct vm_area_struct *vma) +{ + struct fb_deferred_io_state *fbdefio_state = vma->vm_private_data; + + fb_deferred_io_state_get(fbdefio_state); +} + +static void fb_deferred_io_vm_close(struct vm_area_struct *vma) +{ + struct fb_deferred_io_state *fbdefio_state = vma->vm_private_data; + + fb_deferred_io_state_put(fbdefio_state); +} + static struct page *fb_deferred_io_get_page(struct fb_info *info, unsigned long offs) { struct fb_deferred_io *fbdefio = info->fbdefio; @@ -121,25 +190,46 @@ static void fb_deferred_io_pageref_put(struct fb_deferred_io_pageref *pageref, /* this is to find and return the vmalloc-ed fb pages */ static vm_fault_t fb_deferred_io_fault(struct vm_fault *vmf) { + struct fb_info *info; unsigned long offset; struct page *page; - struct fb_info *info = vmf->vma->vm_private_data; + vm_fault_t ret; + struct fb_deferred_io_state *fbdefio_state = vmf->vma->vm_private_data; + + mutex_lock(&fbdefio_state->lock); + + info = fbdefio_state->info; + if (!info) { + ret = VM_FAULT_SIGBUS; /* our device is gone */ + goto err_mutex_unlock; + } offset = vmf->pgoff << PAGE_SHIFT; - if (offset >= info->fix.smem_len) - return VM_FAULT_SIGBUS; + if (offset >= info->fix.smem_len) { + ret = VM_FAULT_SIGBUS; + goto err_mutex_unlock; + } page = fb_deferred_io_get_page(info, offset); - if (!page) - return VM_FAULT_SIGBUS; + if (!page) { + ret = VM_FAULT_SIGBUS; + goto err_mutex_unlock; + } if (!vmf->vma->vm_file) fb_err(info, "no mapping available\n"); BUG_ON(!info->fbdefio->mapping); + mutex_unlock(&fbdefio_state->lock); + vmf->page = page; + return 0; + +err_mutex_unlock: + mutex_unlock(&fbdefio_state->lock); + return ret; } int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync) @@ -166,15 +256,24 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_fsync); * Adds a page to the dirty list. Call this from struct * vm_operations_struct.page_mkwrite. */ -static vm_fault_t fb_deferred_io_track_page(struct fb_info *info, unsigned long offset, - struct page *page) +static vm_fault_t fb_deferred_io_track_page(struct fb_deferred_io_state *fbdefio_state, + unsigned long offset, struct page *page) { - struct fb_deferred_io *fbdefio = info->fbdefio; + struct fb_info *info; + struct fb_deferred_io *fbdefio; struct fb_deferred_io_pageref *pageref; vm_fault_t ret; /* protect against the workqueue changing the page list */ - mutex_lock(&fbdefio->lock); + mutex_lock(&fbdefio_state->lock); + + info = fbdefio_state->info; + if (!info) { + ret = VM_FAULT_SIGBUS; /* our device is gone */ + goto err_mutex_unlock; + } + + fbdefio = info->fbdefio; pageref = fb_deferred_io_pageref_get(info, offset, page); if (WARN_ON_ONCE(!pageref)) { @@ -192,50 +291,38 @@ static vm_fault_t fb_deferred_io_track_page(struct fb_info *info, unsigned long */ lock_page(pageref->page); - mutex_unlock(&fbdefio->lock); + mutex_unlock(&fbdefio_state->lock); /* come back after delay to process the deferred IO */ schedule_delayed_work(&info->deferred_work, fbdefio->delay); return VM_FAULT_LOCKED; err_mutex_unlock: - mutex_unlock(&fbdefio->lock); + mutex_unlock(&fbdefio_state->lock); return ret; } -/* - * fb_deferred_io_page_mkwrite - Mark a page as written for deferred I/O - * @fb_info: The fbdev info structure - * @vmf: The VM fault - * - * This is a callback we get when userspace first tries to - * write to the page. We schedule a workqueue. That workqueue - * will eventually mkclean the touched pages and execute the - * deferred framebuffer IO. Then if userspace touches a page - * again, we repeat the same scheme. - * - * Returns: - * VM_FAULT_LOCKED on success, or a VM_FAULT error otherwise. - */ -static vm_fault_t fb_deferred_io_page_mkwrite(struct fb_info *info, struct vm_fault *vmf) +static vm_fault_t fb_deferred_io_page_mkwrite(struct fb_deferred_io_state *fbdefio_state, + struct vm_fault *vmf) { unsigned long offset = vmf->pgoff << PAGE_SHIFT; struct page *page = vmf->page; file_update_time(vmf->vma->vm_file); - return fb_deferred_io_track_page(info, offset, page); + return fb_deferred_io_track_page(fbdefio_state, offset, page); } -/* vm_ops->page_mkwrite handler */ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf) { - struct fb_info *info = vmf->vma->vm_private_data; + struct fb_deferred_io_state *fbdefio_state = vmf->vma->vm_private_data; - return fb_deferred_io_page_mkwrite(info, vmf); + return fb_deferred_io_page_mkwrite(fbdefio_state, vmf); } static const struct vm_operations_struct fb_deferred_io_vm_ops = { + .open = fb_deferred_io_vm_open, + .close = fb_deferred_io_vm_close, .fault = fb_deferred_io_fault, .page_mkwrite = fb_deferred_io_mkwrite, }; @@ -252,7 +339,10 @@ int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma) vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP); if (!(info->flags & FBINFO_VIRTFB)) vm_flags_set(vma, VM_IO); - vma->vm_private_data = info; + vma->vm_private_data = info->fbdefio_state; + + fb_deferred_io_state_get(info->fbdefio_state); /* released in vma->vm_ops->close() */ + return 0; } EXPORT_SYMBOL_GPL(fb_deferred_io_mmap); @@ -263,9 +353,10 @@ static void fb_deferred_io_work(struct work_struct *work) struct fb_info *info = container_of(work, struct fb_info, deferred_work.work); struct fb_deferred_io_pageref *pageref, *next; struct fb_deferred_io *fbdefio = info->fbdefio; + struct fb_deferred_io_state *fbdefio_state = info->fbdefio_state; /* here we wrprotect the page's mappings, then do all deferred IO. */ - mutex_lock(&fbdefio->lock); + mutex_lock(&fbdefio_state->lock); #ifdef CONFIG_MMU list_for_each_entry(pageref, &fbdefio->pagereflist, list) { struct page *page = pageref->page; @@ -283,12 +374,13 @@ static void fb_deferred_io_work(struct work_struct *work) list_for_each_entry_safe(pageref, next, &fbdefio->pagereflist, list) fb_deferred_io_pageref_put(pageref, info); - mutex_unlock(&fbdefio->lock); + mutex_unlock(&fbdefio_state->lock); } int fb_deferred_io_init(struct fb_info *info) { struct fb_deferred_io *fbdefio = info->fbdefio; + struct fb_deferred_io_state *fbdefio_state; struct fb_deferred_io_pageref *pagerefs; unsigned long npagerefs; int ret; @@ -298,7 +390,11 @@ int fb_deferred_io_init(struct fb_info *info) if (WARN_ON(!info->fix.smem_len)) return -EINVAL; - mutex_init(&fbdefio->lock); + fbdefio_state = fb_deferred_io_state_alloc(); + if (!fbdefio_state) + return -ENOMEM; + fbdefio_state->info = info; + INIT_DELAYED_WORK(&info->deferred_work, fb_deferred_io_work); INIT_LIST_HEAD(&fbdefio->pagereflist); if (fbdefio->delay == 0) /* set a default of 1 s */ @@ -315,10 +411,12 @@ int fb_deferred_io_init(struct fb_info *info) info->npagerefs = npagerefs; info->pagerefs = pagerefs; + info->fbdefio_state = fbdefio_state; + return 0; err: - mutex_destroy(&fbdefio->lock); + fb_deferred_io_state_release(fbdefio_state); return ret; } EXPORT_SYMBOL_GPL(fb_deferred_io_init); @@ -352,11 +450,19 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_release); void fb_deferred_io_cleanup(struct fb_info *info) { struct fb_deferred_io *fbdefio = info->fbdefio; + struct fb_deferred_io_state *fbdefio_state = info->fbdefio_state; fb_deferred_io_lastclose(info); + info->fbdefio_state = NULL; + + mutex_lock(&fbdefio_state->lock); + fbdefio_state->info = NULL; + mutex_unlock(&fbdefio_state->lock); + + fb_deferred_io_state_put(fbdefio_state); + kvfree(info->pagerefs); - mutex_destroy(&fbdefio->lock); fbdefio->mapping = NULL; } EXPORT_SYMBOL_GPL(fb_deferred_io_cleanup); diff --git a/include/linux/fb.h b/include/linux/fb.h index c3302d5135466..da2fdabd18cb3 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -217,13 +217,14 @@ struct fb_deferred_io { unsigned long delay; bool sort_pagereflist; /* sort pagelist by offset */ int open_count; /* number of opened files; protected by fb_info lock */ - struct mutex lock; /* mutex that protects the pageref list */ struct list_head pagereflist; /* list of pagerefs for touched pages */ struct address_space *mapping; /* page cache object for fb device */ /* callback */ struct page *(*get_page)(struct fb_info *info, unsigned long offset); void (*deferred_io)(struct fb_info *info, struct list_head *pagelist); }; + +struct fb_deferred_io_state; #endif /* @@ -490,6 +491,7 @@ struct fb_info { unsigned long npagerefs; struct fb_deferred_io_pageref *pagerefs; struct fb_deferred_io *fbdefio; + struct fb_deferred_io_state *fbdefio_state; #endif const struct fb_ops *fbops; From 609936df7ce11ef3398b4b3db89ffcf849cb8f93 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 4 May 2026 20:15:21 -0400 Subject: [PATCH 533/554] dma-mapping: add __dma_from_device_group_begin()/end() [ Upstream commit ca085faabb42c31ee204235facc5a430cb9e78a9 ] When a structure contains a buffer that DMA writes to alongside fields that the CPU writes to, cache line sharing between the DMA buffer and CPU-written fields can cause data corruption on non-cache-coherent platforms. Add __dma_from_device_group_begin()/end() annotations to ensure proper alignment to prevent this: struct my_device { spinlock_t lock1; __dma_from_device_group_begin(); char dma_buffer1[16]; char dma_buffer2[16]; __dma_from_device_group_end(); spinlock_t lock2; }; Message-ID: <19163086d5e4704c316f18f6da06bc1c72968904.1767601130.git.mst@redhat.com> Acked-by: Marek Szyprowski Reviewed-by: Petr Tesarik Signed-off-by: Michael S. Tsirkin Stable-dep-of: 3023c050af36 ("hwmon: (powerz) Avoid cacheline sharing for DMA buffer") Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/dma-mapping.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 3e63046b899bc..f46a0848cb247 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -7,6 +7,7 @@ #include #include #include +#include /** * List of possible attributes associated with a DMA mapping. The semantics @@ -710,6 +711,18 @@ static inline int dma_get_cache_alignment(void) } #endif +#ifdef ARCH_HAS_DMA_MINALIGN +#define ____dma_from_device_aligned __aligned(ARCH_DMA_MINALIGN) +#else +#define ____dma_from_device_aligned +#endif +/* Mark start of DMA buffer */ +#define __dma_from_device_group_begin(GROUP) \ + __cacheline_group_begin(GROUP) ____dma_from_device_aligned +/* Mark end of DMA buffer */ +#define __dma_from_device_group_end(GROUP) \ + __cacheline_group_end(GROUP) ____dma_from_device_aligned + static inline void *dmam_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { From 1869da3efe703b016b23d4885f3fe6c1751959c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 4 May 2026 20:15:22 -0400 Subject: [PATCH 534/554] hwmon: (powerz) Avoid cacheline sharing for DMA buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3023c050af3600bf451153335dea5e073c9a3088 ] Depending on the architecture the transfer buffer may share a cacheline with the following mutex. As the buffer may be used for DMA, that is problematic. Use the high-level DMA helpers to make sure that cacheline sharing can not happen. Also drop the comment, as the helpers are documentation enough. https://sashiko.dev/#/message/20260408175814.934BFC19421%40smtp.kernel.org Fixes: 4381a36abdf1c ("hwmon: add POWER-Z driver") Cc: stable@vger.kernel.org # ca085faabb42: dma-mapping: add __dma_from_device_group_begin()/end() Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20260408-powerz-cacheline-alias-v1-1-1254891be0dd@weissschuh.net Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/powerz.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/powerz.c b/drivers/hwmon/powerz.c index 96438f5f05d48..6e1359144cabe 100644 --- a/drivers/hwmon/powerz.c +++ b/drivers/hwmon/powerz.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -33,7 +34,9 @@ struct powerz_sensor_data { } __packed; struct powerz_priv { - char transfer_buffer[64]; /* first member to satisfy DMA alignment */ + __dma_from_device_group_begin(); + char transfer_buffer[64]; + __dma_from_device_group_end(); struct mutex mutex; struct completion completion; struct urb *urb; From b0f4711b426a06fb4c4be85c36b9f5588d5140d3 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 5 May 2026 05:49:31 -0400 Subject: [PATCH 535/554] octeon_ep_vf: add NULL check for napi_build_skb() [ Upstream commit dd66b42854705e4e4ee7f14d260f86c578bed3e3 ] napi_build_skb() can return NULL on allocation failure. In __octep_vf_oq_process_rx(), the result is used directly without a NULL check in both the single-buffer and multi-fragment paths, leading to a NULL pointer dereference. Add NULL checks after both napi_build_skb() calls, properly advancing descriptors and consuming remaining fragments on failure. Fixes: 1cd3b407977c ("octeon_ep_vf: add Tx/Rx processing and interrupt support") Cc: stable@vger.kernel.org Signed-off-by: David Carlier Link: https://patch.msgid.link/20260409184009.930359-3-devnexen@gmail.com Signed-off-by: Jakub Kicinski [ inlined missing octep_vf_oq_next_idx() helper as read_idx++ with wraparound ] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../marvell/octeon_ep_vf/octep_vf_rx.c | 36 +++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c index b579d5b545c46..8347e696937cd 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c @@ -409,10 +409,17 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, data_offset = OCTEP_VF_OQ_RESP_HW_SIZE; rx_ol_flags = 0; } - rx_bytes += buff_info->len; - if (buff_info->len <= oq->max_single_buffer_size) { skb = napi_build_skb((void *)resp_hw, PAGE_SIZE); + if (!skb) { + oq->stats->alloc_failures++; + desc_used++; + read_idx++; + if (read_idx == oq->max_count) + read_idx = 0; + continue; + } + rx_bytes += buff_info->len; skb_reserve(skb, data_offset); skb_put(skb, buff_info->len); read_idx++; @@ -424,6 +431,31 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, u16 data_len; skb = napi_build_skb((void *)resp_hw, PAGE_SIZE); + if (!skb) { + oq->stats->alloc_failures++; + desc_used++; + read_idx++; + if (read_idx == oq->max_count) + read_idx = 0; + data_len = buff_info->len - oq->max_single_buffer_size; + while (data_len) { + dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr, + PAGE_SIZE, DMA_FROM_DEVICE); + buff_info = (struct octep_vf_rx_buffer *) + &oq->buff_info[read_idx]; + buff_info->page = NULL; + if (data_len < oq->buffer_size) + data_len = 0; + else + data_len -= oq->buffer_size; + desc_used++; + read_idx++; + if (read_idx == oq->max_count) + read_idx = 0; + } + continue; + } + rx_bytes += buff_info->len; skb_reserve(skb, data_offset); /* Head fragment includes response header(s); * subsequent fragments contains only data. From ebad723082715f6517f3591750c18b91bf45e1c0 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Tue, 5 May 2026 06:17:29 -0400 Subject: [PATCH 536/554] mmc: core: Adjust MDT beyond 2025 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3e487a634bc019166e452ea276f7522710eda9f4 ] JEDEC JESD84-B51B which was released in September 2025, increases the manufacturing year limit for eMMC devices. The eMMC manufacturing year is stored in a 4-bit field in the CID register. Originally, it covered 1997–2012. Later, with EXT_CSD_REV=8, it was extended up to 2025. Now, with EXT_CSD_REV=9, the range is rolled over by another 16 years, up to 2038. The mapping is as follows: cid[8..11] | rev ≤ 4 | 8 ≥ rev > 4 | rev > 8 --------------------------------------------- 0 | 1997 | 2013 | 2029 1 | 1998 | 2014 | 2030 2 | 1999 | 2015 | 2031 3 | 2000 | 2016 | 2032 4 | 2001 | 2017 | 2033 5 | 2002 | 2018 | 2034 6 | 2003 | 2019 | 2035 7 | 2004 | 2020 | 2036 8 | 2005 | 2021 | 2037 9 | 2006 | 2022 | 2038 10 | 2007 | 2023 | 11 | 2008 | 2024 | 12 | 2009 | 2025 | 13 | 2010 | | 2026 14 | 2011 | | 2027 15 | 2012 | | 2028 Signed-off-by: Avri Altman Reviewed-by: Shawn Lin Signed-off-by: Ulf Hansson Stable-dep-of: d6bf2e64dec8 ("mmc: core: Optimize time for secure erase/trim for some Kingston eMMCs") Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 3e7d9437477c7..243da805ada67 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -671,7 +671,14 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) card->ext_csd.enhanced_rpmb_supported = (card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN_RPMB_REL_WR); + + if (card->ext_csd.rev >= 9) { + /* Adjust production date as per JEDEC JESD84-B51B September 2025 */ + if (card->cid.year < 2023) + card->cid.year += 16; + } } + out: return err; } From f597ab044bea75601ce00f694d3e12d1476c2d68 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Tue, 5 May 2026 06:17:30 -0400 Subject: [PATCH 537/554] mmc: core: Add quirk for incorrect manufacturing date [ Upstream commit 263ff314cc5602599d481b0912a381555fcbad28 ] Some eMMC vendors need to report manufacturing dates beyond 2025 but are reluctant to update the EXT_CSD revision from 8 to 9. Changing the Updating the EXT_CSD revision may involve additional testing or qualification steps with customers. To ease this transition and avoid a full re-qualification process, a workaround is needed. This patch introduces a temporary quirk that re-purposes the year codes corresponding to 2010, 2011, and 2012 to represent the years 2026, 2027, and 2028, respectively. This solution is only valid for this three-year period. After 2028, vendors must update their firmware to set EXT_CSD_REV=9 to continue reporting the correct manufacturing date in compliance with the JEDEC standard. The `MMC_QUIRK_BROKEN_MDT` is introduced and enabled for all Sandisk devices to handle this behavior. Signed-off-by: Avri Altman Signed-off-by: Ulf Hansson Stable-dep-of: d6bf2e64dec8 ("mmc: core: Optimize time for secure erase/trim for some Kingston eMMCs") Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/card.h | 6 ++++++ drivers/mmc/core/mmc.c | 5 +++++ drivers/mmc/core/quirks.h | 3 +++ include/linux/mmc/card.h | 1 + 4 files changed, 15 insertions(+) diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h index 1200951bab08c..a9619dd452708 100644 --- a/drivers/mmc/core/card.h +++ b/drivers/mmc/core/card.h @@ -89,6 +89,7 @@ struct mmc_fixup { #define CID_MANFID_MICRON 0x13 #define CID_MANFID_SAMSUNG 0x15 #define CID_MANFID_APACER 0x27 +#define CID_MANFID_SANDISK_MMC 0x45 #define CID_MANFID_SWISSBIT 0x5D #define CID_MANFID_KINGSTON 0x70 #define CID_MANFID_HYNIX 0x90 @@ -305,4 +306,9 @@ static inline int mmc_card_no_uhs_ddr50_tuning(const struct mmc_card *c) return c->quirks & MMC_QUIRK_NO_UHS_DDR50_TUNING; } +static inline int mmc_card_broken_mdt(const struct mmc_card *c) +{ + return c->quirks & MMC_QUIRK_BROKEN_MDT; +} + #endif diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 243da805ada67..49f568f9a7d3e 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -676,6 +676,11 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) /* Adjust production date as per JEDEC JESD84-B51B September 2025 */ if (card->cid.year < 2023) card->cid.year += 16; + } else { + /* Handle vendors with broken MDT reporting */ + if (mmc_card_broken_mdt(card) && card->cid.year >= 2010 && + card->cid.year <= 2012) + card->cid.year += 16; } } diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index c417ed34c0576..f5e8a0f6d11b9 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -170,6 +170,9 @@ static const struct mmc_fixup __maybe_unused mmc_ext_csd_fixups[] = { MMC_FIXUP_EXT_CSD_REV(CID_NAME_ANY, CID_MANFID_NUMONYX, 0x014e, add_quirk, MMC_QUIRK_BROKEN_HPI, 6), + MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_MMC, CID_OEMID_ANY, add_quirk_mmc, + MMC_QUIRK_BROKEN_MDT), + END_FIXUP }; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index ddcdf23d731c4..fd0cb188c46e2 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -330,6 +330,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ #define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY (1<<17) /* Disable broken SD poweroff notify support */ #define MMC_QUIRK_NO_UHS_DDR50_TUNING (1<<18) /* Disable DDR50 tuning */ +#define MMC_QUIRK_BROKEN_MDT (1<<19) /* Wrong manufacturing year */ bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ From afece4e3f2cd57bb4eef8c80ea02eeaa900063e2 Mon Sep 17 00:00:00 2001 From: Luke Wang Date: Tue, 5 May 2026 06:17:31 -0400 Subject: [PATCH 538/554] mmc: core: Optimize time for secure erase/trim for some Kingston eMMCs [ Upstream commit d6bf2e64dec87322f2b11565ddb59c0e967f96e3 ] Kingston eMMC IY2964 and IB2932 takes a fixed ~2 seconds for each secure erase/trim operation regardless of size - that is, a single secure erase/trim operation of 1MB takes the same time as 1GB. With default calculated 3.5MB max discard size, secure erase 1GB requires ~300 separate operations taking ~10 minutes total. Add a card quirk, MMC_QUIRK_FIXED_SECURE_ERASE_TRIM_TIME, to set maximum secure erase size for those devices. This allows 1GB secure erase to complete in a single operation, reducing time from 10 minutes to just 2 seconds. Signed-off-by: Luke Wang Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/card.h | 5 +++++ drivers/mmc/core/queue.c | 9 +++++++-- drivers/mmc/core/quirks.h | 9 +++++++++ include/linux/mmc/card.h | 1 + 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h index a9619dd452708..a7c364d0030ad 100644 --- a/drivers/mmc/core/card.h +++ b/drivers/mmc/core/card.h @@ -311,4 +311,9 @@ static inline int mmc_card_broken_mdt(const struct mmc_card *c) return c->quirks & MMC_QUIRK_BROKEN_MDT; } +static inline int mmc_card_fixed_secure_erase_trim_time(const struct mmc_card *c) +{ + return c->quirks & MMC_QUIRK_FIXED_SECURE_ERASE_TRIM_TIME; +} + #endif diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 284856c8f6559..eb1053d8cae72 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -184,8 +184,13 @@ static void mmc_queue_setup_discard(struct mmc_card *card, return; lim->max_hw_discard_sectors = max_discard; - if (mmc_card_can_secure_erase_trim(card)) - lim->max_secure_erase_sectors = max_discard; + if (mmc_card_can_secure_erase_trim(card)) { + if (mmc_card_fixed_secure_erase_trim_time(card)) + lim->max_secure_erase_sectors = UINT_MAX >> card->erase_shift; + else + lim->max_secure_erase_sectors = max_discard; + } + if (mmc_card_can_trim(card) && card->erased_byte == 0) lim->max_write_zeroes_sectors = max_discard; diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index f5e8a0f6d11b9..6f727b4a60a52 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -153,6 +153,15 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { MMC_FIXUP("M62704", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc, MMC_QUIRK_TRIM_BROKEN), + /* + * On Some Kingston eMMCs, secure erase/trim time is independent + * of erase size, fixed at approximately 2 seconds. + */ + MMC_FIXUP("IY2964", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc, + MMC_QUIRK_FIXED_SECURE_ERASE_TRIM_TIME), + MMC_FIXUP("IB2932", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc, + MMC_QUIRK_FIXED_SECURE_ERASE_TRIM_TIME), + END_FIXUP }; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index fd0cb188c46e2..8151f2240db41 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -331,6 +331,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY (1<<17) /* Disable broken SD poweroff notify support */ #define MMC_QUIRK_NO_UHS_DDR50_TUNING (1<<18) /* Disable DDR50 tuning */ #define MMC_QUIRK_BROKEN_MDT (1<<19) /* Wrong manufacturing year */ +#define MMC_QUIRK_FIXED_SECURE_ERASE_TRIM_TIME (1<<20) /* Secure erase/trim time is fixed regardless of size */ bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ From fb6616806f3dec598eb4178faea6d506db3317c5 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Tue, 5 May 2026 06:33:29 -0400 Subject: [PATCH 539/554] crypto: qat - fix indentation of macros in qat_hal.c [ Upstream commit 4963b39e3a3feed07fbf4d5cc2b5df8498888285 ] The macros in qat_hal.c were using a mixture of tabs and spaces. Update all macro indentation to use tabs consistently, matching the predominant style. This does not introduce any functional change. Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu Stable-dep-of: e7dcb722bb75 ("crypto: qat - fix firmware loading failure for GEN6 devices") Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/intel/qat/qat_common/qat_hal.c | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/qat_hal.c b/drivers/crypto/intel/qat/qat_common/qat_hal.c index da4eca6e1633e..614400e7bb0ac 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_hal.c +++ b/drivers/crypto/intel/qat/qat_common/qat_hal.c @@ -9,17 +9,17 @@ #include "icp_qat_hal.h" #include "icp_qat_uclo.h" -#define BAD_REGADDR 0xffff -#define MAX_RETRY_TIMES 10000 -#define INIT_CTX_ARB_VALUE 0x0 -#define INIT_CTX_ENABLE_VALUE 0x0 -#define INIT_PC_VALUE 0x0 -#define INIT_WAKEUP_EVENTS_VALUE 0x1 -#define INIT_SIG_EVENTS_VALUE 0x1 -#define INIT_CCENABLE_VALUE 0x2000 -#define RST_CSR_QAT_LSB 20 -#define RST_CSR_AE_LSB 0 -#define MC_TIMESTAMP_ENABLE (0x1 << 7) +#define BAD_REGADDR 0xffff +#define MAX_RETRY_TIMES 10000 +#define INIT_CTX_ARB_VALUE 0x0 +#define INIT_CTX_ENABLE_VALUE 0x0 +#define INIT_PC_VALUE 0x0 +#define INIT_WAKEUP_EVENTS_VALUE 0x1 +#define INIT_SIG_EVENTS_VALUE 0x1 +#define INIT_CCENABLE_VALUE 0x2000 +#define RST_CSR_QAT_LSB 20 +#define RST_CSR_AE_LSB 0 +#define MC_TIMESTAMP_ENABLE (0x1 << 7) #define IGNORE_W1C_MASK ((~(1 << CE_BREAKPOINT_BITPOS)) & \ (~(1 << CE_CNTL_STORE_PARITY_ERROR_BITPOS)) & \ From 4d7b42941c9be046a65a994012c9aafcb9b48d32 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Tue, 5 May 2026 06:33:30 -0400 Subject: [PATCH 540/554] crypto: qat - fix firmware loading failure for GEN6 devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e7dcb722bb75bb3f3992f580a8728a794732fd7a ] QAT GEN6 hardware requires a minimum 3 us delay during the acceleration engine reset sequence to ensure the hardware fully settles. Without this delay, the firmware load may fail intermittently. Add a delay after placing the AE into reset and before clearing the reset, matching the hardware requirements and ensuring stable firmware loading. Earlier generations remain unaffected. Fixes: 17fd7514ae68 ("crypto: qat - add qat_6xxx driver") Signed-off-by: Suman Kumar Chakraborty Cc: stable@vger.kernel.org Reviewed-by: Giovanni Cabiddu Reviewed-by: Andy Shevchenko Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/intel/qat/qat_common/adf_accel_engine.c | 7 +++++++ .../crypto/intel/qat/qat_common/icp_qat_fw_loader_handle.h | 1 + drivers/crypto/intel/qat/qat_common/qat_hal.c | 5 ++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_engine.c b/drivers/crypto/intel/qat/qat_common/adf_accel_engine.c index 4b5d0350fc2ef..4abbf8ade6270 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_engine.c +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_engine.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) /* Copyright(c) 2014 - 2020 Intel Corporation */ +#include #include #include #include "adf_cfg.h" @@ -162,8 +163,14 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev) static int adf_ae_reset(struct adf_accel_dev *accel_dev, int ae) { struct adf_fw_loader_data *loader_data = accel_dev->fw_loader; + unsigned long reset_delay; qat_hal_reset(loader_data->fw_loader); + + reset_delay = loader_data->fw_loader->chip_info->reset_delay_us; + if (reset_delay) + fsleep(reset_delay); + if (qat_hal_clr_reset(loader_data->fw_loader)) return -EFAULT; diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_loader_handle.h b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_loader_handle.h index 6887930c7995e..e74cafa95f1cc 100644 --- a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_loader_handle.h +++ b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_loader_handle.h @@ -27,6 +27,7 @@ struct icp_qat_fw_loader_chip_info { int mmp_sram_size; bool nn; bool lm2lm3; + u16 reset_delay_us; u32 lm_size; u32 icp_rst_csr; u32 icp_rst_mask; diff --git a/drivers/crypto/intel/qat/qat_common/qat_hal.c b/drivers/crypto/intel/qat/qat_common/qat_hal.c index 614400e7bb0ac..b4b75da68c200 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_hal.c +++ b/drivers/crypto/intel/qat/qat_common/qat_hal.c @@ -20,6 +20,7 @@ #define RST_CSR_QAT_LSB 20 #define RST_CSR_AE_LSB 0 #define MC_TIMESTAMP_ENABLE (0x1 << 7) +#define MIN_RESET_DELAY_US 3 #define IGNORE_W1C_MASK ((~(1 << CE_BREAKPOINT_BITPOS)) & \ (~(1 << CE_CNTL_STORE_PARITY_ERROR_BITPOS)) & \ @@ -713,8 +714,10 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle, handle->chip_info->wakeup_event_val = 0x80000000; handle->chip_info->fw_auth = true; handle->chip_info->css_3k = true; - if (handle->pci_dev->device == PCI_DEVICE_ID_INTEL_QAT_6XXX) + if (handle->pci_dev->device == PCI_DEVICE_ID_INTEL_QAT_6XXX) { handle->chip_info->dual_sign = true; + handle->chip_info->reset_delay_us = MIN_RESET_DELAY_US; + } handle->chip_info->tgroup_share_ustore = true; handle->chip_info->fcu_ctl_csr = FCU_CONTROL_4XXX; handle->chip_info->fcu_sts_csr = FCU_STATUS_4XXX; From bbcef208c5da076d8316a07b44c5ecd3a6e23eff Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 8 May 2026 17:17:56 -0400 Subject: [PATCH 541/554] mm, swap: speed up hibernation allocation and writeout [ Upstream commit 396f57b5720024638dbb503f6a4abd988a49d815 ] Since commit 0ff67f990bd4 ("mm, swap: remove swap slot cache"), hibernation has been using the swap slot slow allocation path for simplification, which turns out might cause regression for some devices because the allocator now rotates clusters too often, leading to slower allocation and more random distribution of data. Fast allocation is not complex, so implement hibernation support as well. Test result with Samsung SSD 830 Series (SATA II, 3.0 Gbps) shows the performance is several times better [1]: 6.19: 324 seconds After this series: 35 seconds Link: https://lkml.kernel.org/r/20260216-hibernate-perf-v4-1-1ba9f0bf1ec9@tencent.com Link: https://lore.kernel.org/linux-mm/8b4bdcfa-ce3f-4e23-839f-31367df7c18f@gmx.de/ [1] Signed-off-by: Kairui Song Fixes: 0ff67f990bd4 ("mm, swap: remove swap slot cache") Reported-by: Carsten Grohmann Closes: https://lore.kernel.org/linux-mm/20260206121151.dea3633d1f0ded7bbf49c22e@linux-foundation.org/ Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Kemeng Shi Cc: Nhat Pham Cc: Signed-off-by: Andrew Morton [ adjusted helper signatures ] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/swapfile.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 89746abc47373..ca0298a840cdd 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2014,8 +2014,9 @@ void free_swap_and_cache_nr(swp_entry_t entry, int nr) swp_entry_t get_swap_page_of_type(int type) { - struct swap_info_struct *si = swap_type_to_info(type); - unsigned long offset; + struct swap_info_struct *pcp_si, *si = swap_type_to_info(type); + unsigned long pcp_offset, offset = SWAP_ENTRY_INVALID; + struct swap_cluster_info *ci; swp_entry_t entry = {0}; if (!si) @@ -2025,11 +2026,21 @@ swp_entry_t get_swap_page_of_type(int type) if (get_swap_device_info(si)) { if (si->flags & SWP_WRITEOK) { /* - * Grab the local lock to be complaint - * with swap table allocation. + * Try the local cluster first if it matches the device. If + * not, try grab a new cluster and override local cluster. */ local_lock(&percpu_swap_cluster.lock); - offset = cluster_alloc_swap_entry(si, 0, 1); + pcp_si = this_cpu_read(percpu_swap_cluster.si[0]); + pcp_offset = this_cpu_read(percpu_swap_cluster.offset[0]); + if (pcp_si == si && pcp_offset) { + ci = swap_cluster_lock(si, pcp_offset); + if (cluster_is_usable(ci, 0)) + offset = alloc_swap_scan_cluster(si, ci, pcp_offset, 0, 1); + else + swap_cluster_unlock(ci); + } + if (!offset) + offset = cluster_alloc_swap_entry(si, 0, 1); local_unlock(&percpu_swap_cluster.lock); if (offset) entry = swp_entry(si->type, offset); From 23b814ec96a86f6687dc6ffa5d105bc86e7c8505 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 8 May 2026 10:52:59 -0400 Subject: [PATCH 542/554] firmware: exynos-acpm: Drop fake 'const' on handle pointer [ Upstream commit a2be37eedb52ea26938fa4cc9de1ff84963c57ad ] All the functions operating on the 'handle' pointer are claiming it is a pointer to const thus they should not modify the handle. In fact that's a false statement, because first thing these functions do is drop the cast to const with container_of: struct acpm_info *acpm = handle_to_acpm_info(handle); And with such cast the handle is easily writable with simple: acpm->handle.ops.pmic_ops.read_reg = NULL; The code is not correct logically, either, because functions like acpm_get_by_node() and acpm_handle_put() are meant to modify the handle reference counting, thus they must modify the handle. Modification here happens anyway, even if the reference counting is stored in the container which the handle is part of. The code does not have actual visible bug, but incorrect 'const' annotations could lead to incorrect compiler decisions. Fixes: a88927b534ba ("firmware: add Exynos ACPM protocol driver") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20260224104203.42950-2-krzysztof.kozlowski@oss.qualcomm.com Signed-off-by: Krzysztof Kozlowski [ dropped hunks for DVFS/clk-acpm files and `acpm_dvfs_ops` struct that don't exist in 6.18 ] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/samsung/exynos-acpm-pmic.c | 10 +++---- drivers/firmware/samsung/exynos-acpm-pmic.h | 10 +++---- drivers/firmware/samsung/exynos-acpm.c | 16 +++++----- drivers/firmware/samsung/exynos-acpm.h | 2 +- drivers/mfd/sec-acpm.c | 10 +++---- .../firmware/samsung/exynos-acpm-protocol.h | 29 ++++++++----------- 6 files changed, 37 insertions(+), 40 deletions(-) diff --git a/drivers/firmware/samsung/exynos-acpm-pmic.c b/drivers/firmware/samsung/exynos-acpm-pmic.c index 961d7599e4224..52e89d1b790f0 100644 --- a/drivers/firmware/samsung/exynos-acpm-pmic.c +++ b/drivers/firmware/samsung/exynos-acpm-pmic.c @@ -77,7 +77,7 @@ static void acpm_pmic_init_read_cmd(u32 cmd[4], u8 type, u8 reg, u8 chan) cmd[3] = ktime_to_ms(ktime_get()); } -int acpm_pmic_read_reg(const struct acpm_handle *handle, +int acpm_pmic_read_reg(struct acpm_handle *handle, unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, u8 *buf) { @@ -107,7 +107,7 @@ static void acpm_pmic_init_bulk_read_cmd(u32 cmd[4], u8 type, u8 reg, u8 chan, FIELD_PREP(ACPM_PMIC_VALUE, count); } -int acpm_pmic_bulk_read(const struct acpm_handle *handle, +int acpm_pmic_bulk_read(struct acpm_handle *handle, unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, u8 count, u8 *buf) { @@ -150,7 +150,7 @@ static void acpm_pmic_init_write_cmd(u32 cmd[4], u8 type, u8 reg, u8 chan, cmd[3] = ktime_to_ms(ktime_get()); } -int acpm_pmic_write_reg(const struct acpm_handle *handle, +int acpm_pmic_write_reg(struct acpm_handle *handle, unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, u8 value) { @@ -187,7 +187,7 @@ static void acpm_pmic_init_bulk_write_cmd(u32 cmd[4], u8 type, u8 reg, u8 chan, } } -int acpm_pmic_bulk_write(const struct acpm_handle *handle, +int acpm_pmic_bulk_write(struct acpm_handle *handle, unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, u8 count, const u8 *buf) { @@ -220,7 +220,7 @@ static void acpm_pmic_init_update_cmd(u32 cmd[4], u8 type, u8 reg, u8 chan, cmd[3] = ktime_to_ms(ktime_get()); } -int acpm_pmic_update_reg(const struct acpm_handle *handle, +int acpm_pmic_update_reg(struct acpm_handle *handle, unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, u8 value, u8 mask) { diff --git a/drivers/firmware/samsung/exynos-acpm-pmic.h b/drivers/firmware/samsung/exynos-acpm-pmic.h index 078421888a140..88ae9aada2aea 100644 --- a/drivers/firmware/samsung/exynos-acpm-pmic.h +++ b/drivers/firmware/samsung/exynos-acpm-pmic.h @@ -11,19 +11,19 @@ struct acpm_handle; -int acpm_pmic_read_reg(const struct acpm_handle *handle, +int acpm_pmic_read_reg(struct acpm_handle *handle, unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, u8 *buf); -int acpm_pmic_bulk_read(const struct acpm_handle *handle, +int acpm_pmic_bulk_read(struct acpm_handle *handle, unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, u8 count, u8 *buf); -int acpm_pmic_write_reg(const struct acpm_handle *handle, +int acpm_pmic_write_reg(struct acpm_handle *handle, unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, u8 value); -int acpm_pmic_bulk_write(const struct acpm_handle *handle, +int acpm_pmic_bulk_write(struct acpm_handle *handle, unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, u8 count, const u8 *buf); -int acpm_pmic_update_reg(const struct acpm_handle *handle, +int acpm_pmic_update_reg(struct acpm_handle *handle, unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, u8 value, u8 mask); #endif /* __EXYNOS_ACPM_PMIC_H__ */ diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c index 3a69fe3234c75..6572cd7be9d17 100644 --- a/drivers/firmware/samsung/exynos-acpm.c +++ b/drivers/firmware/samsung/exynos-acpm.c @@ -409,7 +409,7 @@ static int acpm_wait_for_message_response(struct acpm_chan *achan, * * Return: 0 on success, -errno otherwise. */ -int acpm_do_xfer(const struct acpm_handle *handle, const struct acpm_xfer *xfer) +int acpm_do_xfer(struct acpm_handle *handle, const struct acpm_xfer *xfer) { struct acpm_info *acpm = handle_to_acpm_info(handle); struct exynos_mbox_msg msg; @@ -649,7 +649,7 @@ static int acpm_probe(struct platform_device *pdev) * acpm_handle_put() - release the handle acquired by acpm_get_by_phandle. * @handle: Handle acquired by acpm_get_by_phandle. */ -static void acpm_handle_put(const struct acpm_handle *handle) +static void acpm_handle_put(struct acpm_handle *handle) { struct acpm_info *acpm = handle_to_acpm_info(handle); struct device *dev = acpm->dev; @@ -675,9 +675,11 @@ static void devm_acpm_release(struct device *dev, void *res) * @np: ACPM device tree node. * * Return: pointer to handle on success, ERR_PTR(-errno) otherwise. + * + * Note: handle CANNOT be pointer to const */ -static const struct acpm_handle *acpm_get_by_node(struct device *dev, - struct device_node *np) +static struct acpm_handle *acpm_get_by_node(struct device *dev, + struct device_node *np) { struct platform_device *pdev; struct device_link *link; @@ -718,10 +720,10 @@ static const struct acpm_handle *acpm_get_by_node(struct device *dev, * * Return: pointer to handle on success, ERR_PTR(-errno) otherwise. */ -const struct acpm_handle *devm_acpm_get_by_node(struct device *dev, - struct device_node *np) +struct acpm_handle *devm_acpm_get_by_node(struct device *dev, + struct device_node *np) { - const struct acpm_handle **ptr, *handle; + struct acpm_handle **ptr, *handle; ptr = devres_alloc(devm_acpm_release, sizeof(*ptr), GFP_KERNEL); if (!ptr) diff --git a/drivers/firmware/samsung/exynos-acpm.h b/drivers/firmware/samsung/exynos-acpm.h index 2d14cb58f98c9..6417550f89aa9 100644 --- a/drivers/firmware/samsung/exynos-acpm.h +++ b/drivers/firmware/samsung/exynos-acpm.h @@ -17,7 +17,7 @@ struct acpm_xfer { struct acpm_handle; -int acpm_do_xfer(const struct acpm_handle *handle, +int acpm_do_xfer(struct acpm_handle *handle, const struct acpm_xfer *xfer); #endif /* __EXYNOS_ACPM_H__ */ diff --git a/drivers/mfd/sec-acpm.c b/drivers/mfd/sec-acpm.c index 8b31c816d65b8..9e529536a210b 100644 --- a/drivers/mfd/sec-acpm.c +++ b/drivers/mfd/sec-acpm.c @@ -217,7 +217,7 @@ static const struct regmap_config s2mpg10_regmap_config_meter = { }; struct sec_pmic_acpm_shared_bus_context { - const struct acpm_handle *acpm; + struct acpm_handle *acpm; unsigned int acpm_chan_id; u8 speedy_channel; }; @@ -240,7 +240,7 @@ static int sec_pmic_acpm_bus_write(void *context, const void *data, size_t count) { struct sec_pmic_acpm_bus_context *ctx = context; - const struct acpm_handle *acpm = ctx->shared->acpm; + struct acpm_handle *acpm = ctx->shared->acpm; const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops; size_t val_count = count - BITS_TO_BYTES(ACPM_ADDR_BITS); const u8 *d = data; @@ -260,7 +260,7 @@ static int sec_pmic_acpm_bus_read(void *context, const void *reg_buf, size_t reg void *val_buf, size_t val_size) { struct sec_pmic_acpm_bus_context *ctx = context; - const struct acpm_handle *acpm = ctx->shared->acpm; + struct acpm_handle *acpm = ctx->shared->acpm; const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops; const u8 *r = reg_buf; u8 reg; @@ -279,7 +279,7 @@ static int sec_pmic_acpm_bus_reg_update_bits(void *context, unsigned int reg, un unsigned int val) { struct sec_pmic_acpm_bus_context *ctx = context; - const struct acpm_handle *acpm = ctx->shared->acpm; + struct acpm_handle *acpm = ctx->shared->acpm; const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops; return pmic_ops->update_reg(acpm, ctx->shared->acpm_chan_id, ctx->type, reg & 0xff, @@ -335,7 +335,7 @@ static int sec_pmic_acpm_probe(struct platform_device *pdev) struct regmap *regmap_common, *regmap_pmic, *regmap; const struct sec_pmic_acpm_platform_data *pdata; struct sec_pmic_acpm_shared_bus_context *shared_ctx; - const struct acpm_handle *acpm; + struct acpm_handle *acpm; struct device *dev = &pdev->dev; int ret, irq; diff --git a/include/linux/firmware/samsung/exynos-acpm-protocol.h b/include/linux/firmware/samsung/exynos-acpm-protocol.h index f628bf1862c25..c0d796c5a2b89 100644 --- a/include/linux/firmware/samsung/exynos-acpm-protocol.h +++ b/include/linux/firmware/samsung/exynos-acpm-protocol.h @@ -14,21 +14,16 @@ struct acpm_handle; struct device_node; struct acpm_pmic_ops { - int (*read_reg)(const struct acpm_handle *handle, - unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, - u8 *buf); - int (*bulk_read)(const struct acpm_handle *handle, - unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, - u8 count, u8 *buf); - int (*write_reg)(const struct acpm_handle *handle, - unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, - u8 value); - int (*bulk_write)(const struct acpm_handle *handle, - unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, - u8 count, const u8 *buf); - int (*update_reg)(const struct acpm_handle *handle, - unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, - u8 value, u8 mask); + int (*read_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id, + u8 type, u8 reg, u8 chan, u8 *buf); + int (*bulk_read)(struct acpm_handle *handle, unsigned int acpm_chan_id, + u8 type, u8 reg, u8 chan, u8 count, u8 *buf); + int (*write_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id, + u8 type, u8 reg, u8 chan, u8 value); + int (*bulk_write)(struct acpm_handle *handle, unsigned int acpm_chan_id, + u8 type, u8 reg, u8 chan, u8 count, const u8 *buf); + int (*update_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id, + u8 type, u8 reg, u8 chan, u8 value, u8 mask); }; struct acpm_ops { @@ -45,7 +40,7 @@ struct acpm_handle { struct device; -const struct acpm_handle *devm_acpm_get_by_node(struct device *dev, - struct device_node *np); +struct acpm_handle *devm_acpm_get_by_node(struct device *dev, + struct device_node *np); #endif /* __EXYNOS_ACPM_PROTOCOL_H */ From a420904450962a562ad053a41a53a27755021b48 Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Fri, 8 May 2026 20:35:33 -0400 Subject: [PATCH 543/554] hfsplus: fix uninit-value by validating catalog record size [ Upstream commit b6b592275aeff184aa82fcf6abccd833fb71b393 ] Syzbot reported a KMSAN uninit-value issue in hfsplus_strcasecmp(). The root cause is that hfs_brec_read() doesn't validate that the on-disk record size matches the expected size for the record type being read. When mounting a corrupted filesystem, hfs_brec_read() may read less data than expected. For example, when reading a catalog thread record, the debug output showed: HFSPLUS_BREC_READ: rec_len=520, fd->entrylength=26 HFSPLUS_BREC_READ: WARNING - entrylength (26) < rec_len (520) - PARTIAL READ! hfs_brec_read() only validates that entrylength is not greater than the buffer size, but doesn't check if it's less than expected. It successfully reads 26 bytes into a 520-byte structure and returns success, leaving 494 bytes uninitialized. This uninitialized data in tmp.thread.nodeName then gets copied by hfsplus_cat_build_key_uni() and used by hfsplus_strcasecmp(), triggering the KMSAN warning when the uninitialized bytes are used as array indices in case_fold(). Fix by introducing hfsplus_brec_read_cat() wrapper that: 1. Calls hfs_brec_read() to read the data 2. Validates the record size based on the type field: - Fixed size for folder and file records - Variable size for thread records (depends on string length) 3. Returns -EIO if size doesn't match expected For thread records, check against HFSPLUS_MIN_THREAD_SZ before reading nodeName.length to avoid reading uninitialized data at call sites that don't zero-initialize the entry structure. Also initialize the tmp variable in hfsplus_find_cat() as defensive programming to ensure no uninitialized data even if validation is bypassed. Reported-by: syzbot+d80abb5b890d39261e72@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d80abb5b890d39261e72 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: syzbot+d80abb5b890d39261e72@syzkaller.appspotmail.com Reviewed-by: Viacheslav Dubeyko Tested-by: Viacheslav Dubeyko Suggested-by: Charalampos Mitrodimas Link: https://lore.kernel.org/all/20260120051114.1281285-1-kartikey406@gmail.com/ [v1] Link: https://lore.kernel.org/all/20260121063109.1830263-1-kartikey406@gmail.com/ [v2] Link: https://lore.kernel.org/all/20260212014233.2422046-1-kartikey406@gmail.com/ [v3] Link: https://lore.kernel.org/all/20260214002100.436125-1-kartikey406@gmail.com/T/ [v4] Link: https://lore.kernel.org/all/20260221061626.15853-1-kartikey406@gmail.com/T/ [v5] Signed-off-by: Deepanshu Kartikey Signed-off-by: Viacheslav Dubeyko Link: https://lore.kernel.org/r/20260307010302.41547-1-kartikey406@gmail.com Signed-off-by: Viacheslav Dubeyko Stable-dep-of: 90c500e4fd83 ("hfsplus: fix held lock freed on hfsplus_fill_super()") Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/hfsplus/bfind.c | 51 +++++++++++++++++++++++++++++++++++++++++ fs/hfsplus/catalog.c | 4 ++-- fs/hfsplus/dir.c | 2 +- fs/hfsplus/hfsplus_fs.h | 9 ++++++++ fs/hfsplus/super.c | 2 +- 5 files changed, 64 insertions(+), 4 deletions(-) diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index afc9c89e8c6af..9e3c19138ae47 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c @@ -287,3 +287,54 @@ int hfs_brec_goto(struct hfs_find_data *fd, int cnt) fd->bnode = bnode; return res; } + +/** + * hfsplus_brec_read_cat - read and validate a catalog record + * @fd: find data structure + * @entry: pointer to catalog entry to read into + * + * Reads a catalog record and validates its size matches the expected + * size based on the record type. + * + * Returns 0 on success, or negative error code on failure. + */ +int hfsplus_brec_read_cat(struct hfs_find_data *fd, hfsplus_cat_entry *entry) +{ + int res; + u32 expected_size; + + res = hfs_brec_read(fd, entry, sizeof(hfsplus_cat_entry)); + if (res) + return res; + + /* Validate catalog record size based on type */ + switch (be16_to_cpu(entry->type)) { + case HFSPLUS_FOLDER: + expected_size = sizeof(struct hfsplus_cat_folder); + break; + case HFSPLUS_FILE: + expected_size = sizeof(struct hfsplus_cat_file); + break; + case HFSPLUS_FOLDER_THREAD: + case HFSPLUS_FILE_THREAD: + /* Ensure we have at least the fixed fields before reading nodeName.length */ + if (fd->entrylength < HFSPLUS_MIN_THREAD_SZ) { + pr_err("thread record too short (got %u)\n", fd->entrylength); + return -EIO; + } + expected_size = hfsplus_cat_thread_size(&entry->thread); + break; + default: + pr_err("unknown catalog record type %d\n", + be16_to_cpu(entry->type)); + return -EIO; + } + + if (fd->entrylength != expected_size) { + pr_err("catalog record size mismatch (type %d, got %u, expected %u)\n", + be16_to_cpu(entry->type), fd->entrylength, expected_size); + return -EIO; + } + + return 0; +} diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 02c1eee4a4b86..6c8380f7208df 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -194,12 +194,12 @@ static int hfsplus_fill_cat_thread(struct super_block *sb, int hfsplus_find_cat(struct super_block *sb, u32 cnid, struct hfs_find_data *fd) { - hfsplus_cat_entry tmp; + hfsplus_cat_entry tmp = {0}; int err; u16 type; hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid); - err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry)); + err = hfsplus_brec_read_cat(fd, &tmp); if (err) return err; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index ca5f74a140ec1..8aeb861969d37 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -49,7 +49,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, if (unlikely(err < 0)) goto fail; again: - err = hfs_brec_read(&fd, &entry, sizeof(entry)); + err = hfsplus_brec_read_cat(&fd, &entry); if (err) { if (err == -ENOENT) { hfs_find_exit(&fd); diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index de801942ae471..d0eb2a4e06b44 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -507,6 +507,15 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, void *buf, void **data, blk_opf_t opf); int hfsplus_read_wrapper(struct super_block *sb); +static inline u32 hfsplus_cat_thread_size(const struct hfsplus_cat_thread *thread) +{ + return offsetof(struct hfsplus_cat_thread, nodeName) + + offsetof(struct hfsplus_unistr, unicode) + + be16_to_cpu(thread->nodeName.length) * sizeof(hfsplus_unichr); +} + +int hfsplus_brec_read_cat(struct hfs_find_data *fd, hfsplus_cat_entry *entry); + /* * time helpers: convert between 1904-base and 1970-base timestamps * diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 5230d368bd4f2..59ea956c72018 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -571,7 +571,7 @@ static int hfsplus_fill_super(struct super_block *sb, struct fs_context *fc) err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); if (unlikely(err < 0)) goto out_put_root; - if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { + if (!hfsplus_brec_read_cat(&fd, &entry)) { hfs_find_exit(&fd); if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) { err = -EIO; From d309d3308de658d87c42d97e044c89a226327526 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Fri, 8 May 2026 20:35:34 -0400 Subject: [PATCH 544/554] hfsplus: fix held lock freed on hfsplus_fill_super() [ Upstream commit 90c500e4fd83fa33c09bc7ee23b6d9cc487ac733 ] hfsplus_fill_super() calls hfs_find_init() to initialize a search structure, which acquires tree->tree_lock. If the subsequent call to hfsplus_cat_build_key() fails, the function jumps to the out_put_root error label without releasing the lock. The later cleanup path then frees the tree data structure with the lock still held, triggering a held lock freed warning. Fix this by adding the missing hfs_find_exit(&fd) call before jumping to the out_put_root error label. This ensures that tree->tree_lock is properly released on the error path. The bug was originally detected on v6.13-rc1 using an experimental static analysis tool we are developing, and we have verified that the issue persists in the latest mainline kernel. The tool is specifically designed to detect memory management issues. It is currently under active development and not yet publicly available. We confirmed the bug by runtime testing under QEMU with x86_64 defconfig, lockdep enabled, and CONFIG_HFSPLUS_FS=y. To trigger the error path, we used GDB to dynamically shrink the max_unistr_len parameter to 1 before hfsplus_asc2uni() is called. This forces hfsplus_asc2uni() to naturally return -ENAMETOOLONG, which propagates to hfsplus_cat_build_key() and exercises the faulty error path. The following warning was observed during mount: ========================= WARNING: held lock freed! 7.0.0-rc3-00016-gb4f0dd314b39 #4 Not tainted ------------------------- mount/174 is freeing memory ffff888103f92000-ffff888103f92fff, with a lock still held there! ffff888103f920b0 (&tree->tree_lock){+.+.}-{4:4}, at: hfsplus_find_init+0x154/0x1e0 2 locks held by mount/174: #0: ffff888103f960e0 (&type->s_umount_key#42/1){+.+.}-{4:4}, at: alloc_super.constprop.0+0x167/0xa40 #1: ffff888103f920b0 (&tree->tree_lock){+.+.}-{4:4}, at: hfsplus_find_init+0x154/0x1e0 stack backtrace: CPU: 2 UID: 0 PID: 174 Comm: mount Not tainted 7.0.0-rc3-00016-gb4f0dd314b39 #4 PREEMPT(lazy) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 Call Trace: dump_stack_lvl+0x82/0xd0 debug_check_no_locks_freed+0x13a/0x180 kfree+0x16b/0x510 ? hfsplus_fill_super+0xcb4/0x18a0 hfsplus_fill_super+0xcb4/0x18a0 ? __pfx_hfsplus_fill_super+0x10/0x10 ? srso_return_thunk+0x5/0x5f ? bdev_open+0x65f/0xc30 ? srso_return_thunk+0x5/0x5f ? pointer+0x4ce/0xbf0 ? trace_contention_end+0x11c/0x150 ? __pfx_pointer+0x10/0x10 ? srso_return_thunk+0x5/0x5f ? bdev_open+0x79b/0xc30 ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? vsnprintf+0x6da/0x1270 ? srso_return_thunk+0x5/0x5f ? __mutex_unlock_slowpath+0x157/0x740 ? __pfx_vsnprintf+0x10/0x10 ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? mark_held_locks+0x49/0x80 ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? irqentry_exit+0x17b/0x5e0 ? trace_irq_disable.constprop.0+0x116/0x150 ? __pfx_hfsplus_fill_super+0x10/0x10 ? __pfx_hfsplus_fill_super+0x10/0x10 get_tree_bdev_flags+0x302/0x580 ? __pfx_get_tree_bdev_flags+0x10/0x10 ? vfs_parse_fs_qstr+0x129/0x1a0 ? __pfx_vfs_parse_fs_qstr+0x3/0x10 vfs_get_tree+0x89/0x320 fc_mount+0x10/0x1d0 path_mount+0x5c5/0x21c0 ? __pfx_path_mount+0x10/0x10 ? trace_irq_enable.constprop.0+0x116/0x150 ? trace_irq_enable.constprop.0+0x116/0x150 ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? kmem_cache_free+0x307/0x540 ? user_path_at+0x51/0x60 ? __x64_sys_mount+0x212/0x280 ? srso_return_thunk+0x5/0x5f __x64_sys_mount+0x212/0x280 ? __pfx___x64_sys_mount+0x10/0x10 ? srso_return_thunk+0x5/0x5f ? trace_irq_enable.constprop.0+0x116/0x150 ? srso_return_thunk+0x5/0x5f do_syscall_64+0x111/0x680 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7ffacad55eae Code: 48 8b 0d 85 1f 0f 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 a5 00 00 8 RSP: 002b:00007fff1ab55718 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007ffacad55eae RDX: 000055740c64e5b0 RSI: 000055740c64e630 RDI: 000055740c651ab0 RBP: 000055740c64e380 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000055740c64e5b0 R14: 000055740c651ab0 R15: 000055740c64e380 After applying this patch, the warning no longer appears. Fixes: 89ac9b4d3d1a ("hfsplus: fix longname handling") CC: stable@vger.kernel.org Signed-off-by: Zilin Guan Reviewed-by: Viacheslav Dubeyko Tested-by: Viacheslav Dubeyko Signed-off-by: Viacheslav Dubeyko Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/hfsplus/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 59ea956c72018..e1e2833f528d1 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -569,8 +569,10 @@ static int hfsplus_fill_super(struct super_block *sb, struct fs_context *fc) if (err) goto out_put_root; err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); - if (unlikely(err < 0)) + if (unlikely(err < 0)) { + hfs_find_exit(&fd); goto out_put_root; + } if (!hfsplus_brec_read_cat(&fd, &entry)) { hfs_find_exit(&fd); if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) { From 24da4cc6bf27bca15770bcbb15a84adb550fafd6 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 8 May 2026 23:22:28 -0400 Subject: [PATCH 545/554] erofs: tidy up z_erofs_lz4_handle_overlap() [ Upstream commit 9ae77198d4815c63fc8ebacc659c71d150d1e51b ] - Add some useful comments to explain inplace I/Os and decompression; - Rearrange the code to get rid of one unnecessary goto. Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Stable-dep-of: 21e161de2dc6 ("erofs: fix unsigned underflow in z_erofs_lz4_handle_overlap()") Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/erofs/decompressor.c | 85 ++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 354762c9723f9..2f4cef67cf640 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -105,44 +105,58 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq, return kaddr ? 1 : 0; } -static void *z_erofs_lz4_handle_overlap(struct z_erofs_decompress_req *rq, +static void *z_erofs_lz4_handle_overlap(const struct z_erofs_decompress_req *rq, void *inpage, void *out, unsigned int *inputmargin, int *maptype, bool may_inplace) { - unsigned int oend, omargin, total, i; + unsigned int oend, omargin, cnt, i; struct page **in; - void *src, *tmp; - - if (rq->inplace_io) { - oend = rq->pageofs_out + rq->outputsize; - omargin = PAGE_ALIGN(oend) - oend; - if (rq->partial_decoding || !may_inplace || - omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize)) - goto docopy; + void *src; + /* + * If in-place I/O isn't used, for example, the bounce compressed cache + * can hold data for incomplete read requests. Just map the compressed + * buffer as well and decompress directly. + */ + if (!rq->inplace_io) { + if (rq->inpages <= 1) { + *maptype = 0; + return inpage; + } + kunmap_local(inpage); + src = erofs_vm_map_ram(rq->in, rq->inpages); + if (!src) + return ERR_PTR(-ENOMEM); + *maptype = 1; + return src; + } + /* + * Then, deal with in-place I/Os. The reasons why in-place I/O is useful + * are: (1) It minimizes memory footprint during the I/O submission, + * which is useful for slow storage (including network devices and + * low-end HDDs/eMMCs) but with a lot inflight I/Os; (2) If in-place + * decompression can also be applied, it will reuse the unique buffer so + * that no extra CPU D-cache is polluted with temporary compressed data + * for extreme performance. + */ + oend = rq->pageofs_out + rq->outputsize; + omargin = PAGE_ALIGN(oend) - oend; + if (!rq->partial_decoding && may_inplace && + omargin >= LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize)) { for (i = 0; i < rq->inpages; ++i) if (rq->out[rq->outpages - rq->inpages + i] != rq->in[i]) - goto docopy; - kunmap_local(inpage); - *maptype = 3; - return out + ((rq->outpages - rq->inpages) << PAGE_SHIFT); - } - - if (rq->inpages <= 1) { - *maptype = 0; - return inpage; + break; + if (i >= rq->inpages) { + kunmap_local(inpage); + *maptype = 3; + return out + ((rq->outpages - rq->inpages) << PAGE_SHIFT); + } } - kunmap_local(inpage); - src = erofs_vm_map_ram(rq->in, rq->inpages); - if (!src) - return ERR_PTR(-ENOMEM); - *maptype = 1; - return src; - -docopy: - /* Or copy compressed data which can be overlapped to per-CPU buffer */ - in = rq->in; + /* + * If in-place decompression can't be applied, copy compressed data that + * may potentially overlap during decompression to a per-CPU buffer. + */ src = z_erofs_get_gbuf(rq->inpages); if (!src) { DBG_BUGON(1); @@ -150,20 +164,13 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_decompress_req *rq, return ERR_PTR(-EFAULT); } - tmp = src; - total = rq->inputsize; - while (total) { - unsigned int page_copycnt = - min_t(unsigned int, total, PAGE_SIZE - *inputmargin); - + for (i = 0, in = rq->in; i < rq->inputsize; i += cnt, ++in) { + cnt = min_t(u32, rq->inputsize - i, PAGE_SIZE - *inputmargin); if (!inpage) inpage = kmap_local_page(*in); - memcpy(tmp, inpage + *inputmargin, page_copycnt); + memcpy(src + i, inpage + *inputmargin, cnt); kunmap_local(inpage); inpage = NULL; - tmp += page_copycnt; - total -= page_copycnt; - ++in; *inputmargin = 0; } *maptype = 2; From c9ce18e6bb2c467ec85756dc7989b547b7584fee Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Fri, 8 May 2026 23:22:29 -0400 Subject: [PATCH 546/554] erofs: fix unsigned underflow in z_erofs_lz4_handle_overlap() [ Upstream commit 21e161de2dc660b1bb70ef5b156ab8e6e1cca3ab ] Some crafted images can have illegal (!partial_decoding && m_llen < m_plen) extents, and the LZ4 inplace decompression path can be wrongly hit, but it cannot handle (outpages < inpages) properly: "outpages - inpages" wraps to a large value and the subsequent rq->out[] access reads past the decompressed_pages array. However, such crafted cases can correctly result in a corruption report in the normal LZ4 non-inplace path. Let's add an additional check to fix this for backporting. Reproducible image (base64-encoded gzipped blob): H4sIAJGR12kCA+3SPUoDQRgG4MkmkkZk8QRbRFIIi9hbpEjrHQI5ghfwCN5BLCzTGtLbBI+g dilSJo1CnIm7GEXFxhT6PDDwfrs73/ywIQD/1ePD4r7Ou6ETsrq4mu7XcWfj++Pb58nJU/9i PNtbjhan04/9GtX4qVYc814WDqt6FaX5s+ZwXXeq52lndT6IuVvlblytLMvh4Gzwaf90nsvz 2DF/21+20T/ldgp5s1jXRaN4t/8izsy/OUB6e/Qa79r+JwAAAAAAAL52vQVuGQAAAP6+my1w ywAAAAAAAADwu14ATsEYtgBQAAA= $ mount -t erofs -o cache_strategy=disabled foo.erofs /mnt $ dd if=/mnt/data of=/dev/null bs=4096 count=1 Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend") Reported-by: Yuhao Jiang Cc: stable@vger.kernel.org Signed-off-by: Junrui Luo Reviewed-by: Gao Xiang Signed-off-by: Gao Xiang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/erofs/decompressor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 2f4cef67cf640..e4e59a4e0d90d 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -142,6 +142,7 @@ static void *z_erofs_lz4_handle_overlap(const struct z_erofs_decompress_req *rq, oend = rq->pageofs_out + rq->outputsize; omargin = PAGE_ALIGN(oend) - oend; if (!rq->partial_decoding && may_inplace && + rq->outpages >= rq->inpages && omargin >= LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize)) { for (i = 0; i < rq->inpages; ++i) if (rq->out[rq->outpages - rq->inpages + i] != From ef63ef44e86c64a3c1260c2b67b122d9f82e315e Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 9 May 2026 17:29:56 -0400 Subject: [PATCH 547/554] printk: add print_hex_dump_devel() [ Upstream commit d134feeb5df33fbf77f482f52a366a44642dba09 ] Add print_hex_dump_devel() as the hex dump equivalent of pr_devel(), which emits output only when DEBUG is enabled, but keeps call sites compiled otherwise. Suggested-by: Herbert Xu Signed-off-by: Thorsten Blum Reviewed-by: John Ogness Signed-off-by: Herbert Xu Stable-dep-of: 177730a273b1 ("crypto: caam - guard HMAC key hex dumps in hash_digest_key") Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/printk.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/printk.h b/include/linux/printk.h index 45c663124c9bd..9a8eaed5f8778 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -802,6 +802,19 @@ static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type, } #endif +#if defined(DEBUG) +#define print_hex_dump_devel(prefix_str, prefix_type, rowsize, \ + groupsize, buf, len, ascii) \ + print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize, \ + groupsize, buf, len, ascii) +#else +static inline void print_hex_dump_devel(const char *prefix_str, int prefix_type, + int rowsize, int groupsize, + const void *buf, size_t len, bool ascii) +{ +} +#endif + /** * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params * @prefix_str: string to prefix each line with; From 5cffe3c136891aa4d579bf5c079a68f7cb371b0c Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 9 May 2026 17:29:57 -0400 Subject: [PATCH 548/554] crypto: caam - guard HMAC key hex dumps in hash_digest_key [ Upstream commit 177730a273b18e195263ed953853273e901b5064 ] Use print_hex_dump_devel() for dumping sensitive HMAC key bytes in hash_digest_key() to avoid leaking secrets at runtime when CONFIG_DYNAMIC_DEBUG is enabled. Fixes: 045e36780f11 ("crypto: caam - ahash hmac support") Fixes: 3f16f6c9d632 ("crypto: caam/qi2 - add support for ahash algorithms") Cc: stable@vger.kernel.org Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/caamalg_qi2.c | 4 ++-- drivers/crypto/caam/caamhash.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index 07665494c8758..c97c10cdf207a 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -3269,7 +3269,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key, dpaa2_fl_set_addr(out_fle, key_dma); dpaa2_fl_set_len(out_fle, digestsize); - print_hex_dump_debug("key_in@" __stringify(__LINE__)": ", + print_hex_dump_devel("key_in@" __stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, key, *keylen, 1); print_hex_dump_debug("shdesc@" __stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), @@ -3289,7 +3289,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key, /* in progress */ wait_for_completion(&result.completion); ret = result.err; - print_hex_dump_debug("digested key@" __stringify(__LINE__)": ", + print_hex_dump_devel("digested key@" __stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, key, digestsize, 1); } diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 053af748be86d..cc942e5ab2799 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -393,7 +393,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key, append_seq_store(desc, digestsize, LDST_CLASS_2_CCB | LDST_SRCDST_BYTE_CONTEXT); - print_hex_dump_debug("key_in@"__stringify(__LINE__)": ", + print_hex_dump_devel("key_in@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, key, *keylen, 1); print_hex_dump_debug("jobdesc@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), @@ -408,7 +408,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key, wait_for_completion(&result.completion); ret = result.err; - print_hex_dump_debug("digested key@"__stringify(__LINE__)": ", + print_hex_dump_devel("digested key@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, key, digestsize, 1); } From 9d1774b33e1083012cc5f938c6428dae935b0607 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 10 May 2026 09:54:15 -0400 Subject: [PATCH 549/554] net: stmmac: rename STMMAC_GET_ENTRY() -> STMMAC_NEXT_ENTRY() [ Upstream commit 6b4286e0550814cdc4b897f881ec1fa8b0313227 ] STMMAC_GET_ENTRY() doesn't describe what this macro is doing - it is incrementing the provided index for the circular array of descriptors. Replace "GET" with "NEXT" as this better describes the action here. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w2vba-0000000DbWo-1oL5@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski Stable-dep-of: 0bb05e6adfa9 ("net: stmmac: Prevent NULL deref when RX memory exhausted") Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/stmicro/stmmac/chain_mode.c | 2 +- drivers/net/ethernet/stmicro/stmmac/common.h | 2 +- .../net/ethernet/stmicro/stmmac/ring_mode.c | 2 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 26 +++++++++---------- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c index 1c01e3c640cee..2515608878235 100644 --- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c @@ -47,7 +47,7 @@ static int jumbo_frm(struct stmmac_tx_queue *tx_q, struct sk_buff *skb, while (len != 0) { tx_q->tx_skbuff[entry] = NULL; - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_tx_size); desc = tx_q->dma_tx + entry; if (len > bmax) { diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 9e012720a69fa..acd7719506b61 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -60,7 +60,7 @@ static inline bool dwmac_is_xmac(enum dwmac_core_type core_type) #define DMA_MIN_RX_SIZE 64 #define DMA_MAX_RX_SIZE 1024 #define DMA_DEFAULT_RX_SIZE 512 -#define STMMAC_GET_ENTRY(x, size) ((x + 1) & (size - 1)) +#define STMMAC_NEXT_ENTRY(x, size) ((x + 1) & (size - 1)) #undef FRAME_FILTER_DEBUG /* #define FRAME_FILTER_DEBUG */ diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index d218412ca832f..45c14c1bb0eaa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -51,7 +51,7 @@ static int jumbo_frm(struct stmmac_tx_queue *tx_q, struct sk_buff *skb, stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum, STMMAC_RING_MODE, 0, false, skb->len); tx_q->tx_skbuff[entry] = NULL; - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_tx_size); if (priv->extend_desc) desc = (struct dma_desc *)(tx_q->dma_etx + entry); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index bd9aaab7f1712..7bccb5b0a84d3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2609,7 +2609,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) xsk_tx_metadata_to_compl(meta, &tx_q->tx_skbuff_dma[entry].xsk_meta); - tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); + tx_q->cur_tx = STMMAC_NEXT_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); entry = tx_q->cur_tx; } u64_stats_update_begin(&txq_stats->napi_syncp); @@ -2780,7 +2780,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue, stmmac_release_tx_desc(priv, p, priv->mode); - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_tx_size); } tx_q->dirty_tx = entry; @@ -4079,7 +4079,7 @@ static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb, return false; stmmac_set_tx_owner(priv, p); - tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); + tx_q->cur_tx = STMMAC_NEXT_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); return true; } @@ -4107,7 +4107,7 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des, while (tmp_len > 0) { dma_addr_t curr_addr; - tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, + tx_q->cur_tx = STMMAC_NEXT_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]); @@ -4258,7 +4258,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_set_mss(priv, mss_desc, mss); tx_q->mss = mss; - tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, + tx_q->cur_tx = STMMAC_NEXT_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]); } @@ -4362,7 +4362,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) * ndo_start_xmit will fill this descriptor the next time it's * called and stmmac_tx_clean may clean up to this descriptor. */ - tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); + tx_q->cur_tx = STMMAC_NEXT_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); if (unlikely(stmmac_tx_avail(priv, queue) <= (MAX_SKB_FRAGS + 1))) { netif_dbg(priv, hw, priv->dev, "%s: stop transmitted packets\n", @@ -4566,7 +4566,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) int len = skb_frag_size(frag); bool last_segment = (i == (nfrags - 1)); - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_tx_size); WARN_ON(tx_q->tx_skbuff[entry]); if (likely(priv->extend_desc)) @@ -4636,7 +4636,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) * ndo_start_xmit will fill this descriptor the next time it's * called and stmmac_tx_clean may clean up to this descriptor. */ - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_tx_size); tx_q->cur_tx = entry; if (netif_msg_pktdata(priv)) { @@ -4805,7 +4805,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) dma_wmb(); stmmac_set_rx_owner(priv, p, use_rx_wd); - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_rx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_rx_size); } rx_q->dirty_rx = entry; rx_q->rx_tail_addr = rx_q->dma_rx_phy + @@ -4953,7 +4953,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, stmmac_enable_dma_transmission(priv, priv->ioaddr, queue); - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_tx_size); tx_q->cur_tx = entry; return STMMAC_XDP_TX; @@ -5187,7 +5187,7 @@ static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget) dma_wmb(); stmmac_set_rx_owner(priv, rx_desc, use_rx_wd); - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_rx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_rx_size); } if (rx_desc) { @@ -5282,7 +5282,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue) break; /* Prefetch the next RX descriptor */ - rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, + rx_q->cur_rx = STMMAC_NEXT_ENTRY(rx_q->cur_rx, priv->dma_conf.dma_rx_size); next_entry = rx_q->cur_rx; @@ -5478,7 +5478,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) if (unlikely(status & dma_own)) break; - rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, + rx_q->cur_rx = STMMAC_NEXT_ENTRY(rx_q->cur_rx, priv->dma_conf.dma_rx_size); next_entry = rx_q->cur_rx; From 4af2e62cbcda575a174acd230c3f3a208135e16d Mon Sep 17 00:00:00 2001 From: Sam Edwards Date: Sun, 10 May 2026 09:54:16 -0400 Subject: [PATCH 550/554] net: stmmac: Prevent NULL deref when RX memory exhausted [ Upstream commit 0bb05e6adfa99a2ea1fee1125cc0953409f83ed8 ] The CPU receives frames from the MAC through conventional DMA: the CPU allocates buffers for the MAC, then the MAC fills them and returns ownership to the CPU. For each hardware RX queue, the CPU and MAC coordinate through a shared ring array of DMA descriptors: one descriptor per DMA buffer. Each descriptor includes the buffer's physical address and a status flag ("OWN") indicating which side owns the buffer: OWN=0 for CPU, OWN=1 for MAC. The CPU is only allowed to set the flag and the MAC is only allowed to clear it, and both must move through the ring in sequence: thus the ring is used for both "submissions" and "completions." In the stmmac driver, stmmac_rx() bookmarks its position in the ring with the `cur_rx` index. The main receive loop in that function checks for rx_descs[cur_rx].own=0, gives the corresponding buffer to the network stack (NULLing the pointer), and increments `cur_rx` modulo the ring size. After the loop exits, stmmac_rx_refill(), which bookmarks its position with `dirty_rx`, allocates fresh buffers and rearms the descriptors (setting OWN=1). If it fails any allocation, it simply stops early (leaving OWN=0) and will retry where it left off when next called. This means descriptors have a three-stage lifecycle (terms my own): - `empty` (OWN=1, buffer valid) - `full` (OWN=0, buffer valid and populated) - `dirty` (OWN=0, buffer NULL) But because stmmac_rx() only checks OWN, it confuses `full`/`dirty`. In the past (see 'Fixes:'), there was a bug where the loop could cycle `cur_rx` all the way back to the first descriptor it dirtied, resulting in a NULL dereference when mistaken for `full`. The aforementioned commit resolved that *specific* failure by capping the loop's iteration limit at `dma_rx_size - 1`, but this is only a partial fix: if the previous stmmac_rx_refill() didn't complete, then there are leftover `dirty` descriptors that the loop might encounter without needing to cycle fully around. The current code therefore panics (see 'Closes:') when stmmac_rx_refill() is memory-starved long enough for `cur_rx` to catch up to `dirty_rx`. Fix this by explicitly checking, before advancing `cur_rx`, if the next entry is dirty; exit the loop if so. This prevents processing of the final, used descriptor until stmmac_rx_refill() succeeds, but fully prevents the `cur_rx == dirty_rx` ambiguity as the previous bugfix intended: so remove the clamp as well. Since stmmac_rx_zc() is a copy-paste-and-tweak of stmmac_rx() and the code structure is identical, any fix to stmmac_rx() will also need a corresponding fix for stmmac_rx_zc(). Therefore, apply the same check there. In stmmac_rx() (not stmmac_rx_zc()), a related bug remains: after the MAC sets OWN=0 on the final descriptor, it will be unable to send any further DMA-complete IRQs until it's given more `empty` descriptors. Currently, the driver simply *hopes* that the next stmmac_rx_refill() succeeds, risking an indefinite stall of the receive process if not. But this is not a regression, so it can be addressed in a future change. Fixes: b6cb4541853c7 ("net: stmmac: avoid rx queue overrun") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221010 Cc: stable@vger.kernel.org Suggested-by: Russell King Signed-off-by: Sam Edwards Link: https://patch.msgid.link/20260422044503.5349-1-CFSworks@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 7bccb5b0a84d3..41b270a486308 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5282,9 +5282,12 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue) break; /* Prefetch the next RX descriptor */ - rx_q->cur_rx = STMMAC_NEXT_ENTRY(rx_q->cur_rx, - priv->dma_conf.dma_rx_size); - next_entry = rx_q->cur_rx; + next_entry = STMMAC_NEXT_ENTRY(rx_q->cur_rx, + priv->dma_conf.dma_rx_size); + if (unlikely(next_entry == rx_q->dirty_rx)) + break; + + rx_q->cur_rx = next_entry; if (priv->extend_desc) np = (struct dma_desc *)(rx_q->dma_erx + next_entry); @@ -5422,7 +5425,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) dma_dir = page_pool_get_dma_dir(rx_q->page_pool); bufsz = DIV_ROUND_UP(priv->dma_conf.dma_buf_sz, PAGE_SIZE) * PAGE_SIZE; - limit = min(priv->dma_conf.dma_rx_size - 1, (unsigned int)limit); if (netif_msg_rx_status(priv)) { void *rx_head; @@ -5478,9 +5480,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) if (unlikely(status & dma_own)) break; - rx_q->cur_rx = STMMAC_NEXT_ENTRY(rx_q->cur_rx, - priv->dma_conf.dma_rx_size); - next_entry = rx_q->cur_rx; + next_entry = STMMAC_NEXT_ENTRY(rx_q->cur_rx, + priv->dma_conf.dma_rx_size); + if (unlikely(next_entry == rx_q->dirty_rx)) + break; + + rx_q->cur_rx = next_entry; if (priv->extend_desc) np = (struct dma_desc *)(rx_q->dma_erx + next_entry); From a0e560d15ff1ce403a0eda17f25a1ee0c086794c Mon Sep 17 00:00:00 2001 From: Gary Guo Date: Tue, 12 May 2026 16:02:57 +0100 Subject: [PATCH 551/554] rust: pin-init: fix incorrect accessor reference lifetime commit 68bf102226cf2199dc609b67c1e847cad4de4b57 upstream When a field has been initialized, `init!`/`pin_init!` create a reference or pinned reference to the field so it can be accessed later during the initialization of other fields. However, the reference it created is incorrectly `&'static` rather than just the scope of the initializer. This means that you can do init!(Foo { a: 1, _: { let b: &'static u32 = a; } }) which is unsound. This is caused by `&mut (*$slot).$ident`, which actually allows arbitrary lifetime, so this is effectively `'static`. Fix it by adding `let_binding` method on `DropGuard` to shorten lifetime. This results in exactly what we want for these accessors. The safety and invariant comments of `DropGuard` have been reworked; instead of reasoning about what caller can do with the guard, express it in a way that the ownership is transferred to the guard and `forget` takes it back, so the unsafe operations within the `DropGuard` can be more easily justified. Assisted-by: Claude:claude-3-opus Signed-off-by: Gary Guo Signed-off-by: Greg Kroah-Hartman --- rust/pin-init/src/__internal.rs | 28 ++++++---- rust/pin-init/src/macros.rs | 91 +++++++++++++++++++-------------- 2 files changed, 73 insertions(+), 46 deletions(-) diff --git a/rust/pin-init/src/__internal.rs b/rust/pin-init/src/__internal.rs index 90f18e9a2912c..83cc7ca2ead34 100644 --- a/rust/pin-init/src/__internal.rs +++ b/rust/pin-init/src/__internal.rs @@ -218,32 +218,42 @@ fn stack_init_reuse() { /// When a value of this type is dropped, it drops a `T`. /// /// Can be forgotten to prevent the drop. +/// +/// # Invariants +/// +/// - `ptr` is valid and properly aligned. +/// - `*ptr` is initialized and owned by this guard. pub struct DropGuard { ptr: *mut T, } impl DropGuard { - /// Creates a new [`DropGuard`]. It will [`ptr::drop_in_place`] `ptr` when it gets dropped. + /// Creates a drop guard and transfer the ownership of the pointer content. /// - /// # Safety + /// The ownership is only relinquished if the guard is forgotten via [`core::mem::forget`]. /// - /// `ptr` must be a valid pointer. + /// # Safety /// - /// It is the callers responsibility that `self` will only get dropped if the pointee of `ptr`: - /// - has not been dropped, - /// - is not accessible by any other means, - /// - will not be dropped by any other means. + /// - `ptr` is valid and properly aligned. + /// - `*ptr` is initialized, and the ownership is transferred to this guard. #[inline] pub unsafe fn new(ptr: *mut T) -> Self { + // INVARIANT: By safety requirement. Self { ptr } } + + /// Create a let binding for accessor use. + #[inline] + pub fn let_binding(&mut self) -> &mut T { + // SAFETY: Per type invariant. + unsafe { &mut *self.ptr } + } } impl Drop for DropGuard { #[inline] fn drop(&mut self) { - // SAFETY: A `DropGuard` can only be constructed using the unsafe `new` function - // ensuring that this operation is safe. + // SAFETY: `self.ptr` is valid, properly aligned and `*self.ptr` is owned by this guard. unsafe { ptr::drop_in_place(self.ptr) } } } diff --git a/rust/pin-init/src/macros.rs b/rust/pin-init/src/macros.rs index fdf38b4fdbdc6..a65ddf6cc8733 100644 --- a/rust/pin-init/src/macros.rs +++ b/rust/pin-init/src/macros.rs @@ -1310,27 +1310,33 @@ macro_rules! __init_internal { // return when an error/panic occurs. // We also use the `data` to require the correct trait (`Init` or `PinInit`) for `$field`. unsafe { $data.$field(::core::ptr::addr_of_mut!((*$slot).$field), init)? }; - // NOTE: the field accessor ensures that the initialized field is properly aligned. + // NOTE: this ensures that the initialized field is properly aligned. // Unaligned fields will cause the compiler to emit E0793. We do not support // unaligned fields since `Init::__init` requires an aligned pointer; the call to // `ptr::write` below has the same requirement. - // SAFETY: - // - the project function does the correct field projection, - // - the field has been initialized, - // - the reference is only valid until the end of the initializer. - #[allow(unused_variables)] - let $field = $crate::macros::paste!(unsafe { $data.[< __project_ $field >](&mut (*$slot).$field) }); + // SAFETY: the field has been initialized. + let _ = unsafe { &mut (*$slot).$field }; // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. // We use `paste!` to create new hygiene for `$field`. $crate::macros::paste! { - // SAFETY: We forget the guard later when initialization has succeeded. - let [< __ $field _guard >] = unsafe { + // SAFETY: + // - `addr_of_mut!((*$slot).$field)` is valid. + // - `(*$slot).$field` has been initialized above. + // - We only need the ownership to the pointee back when initialization has + // succeeded, where we `forget` the guard. + let mut [< __ $field _guard >] = unsafe { $crate::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) }; + // NOTE: The reference is derived from the guard so that it only lives as long as + // the guard does and cannot escape the scope. + #[allow(unused_variables)] + // SAFETY: the project function does the correct field projection. + let $field = unsafe { $data.[< __project_ $field >]([< __ $field _guard >].let_binding()) }; + $crate::__init_internal!(init_slot($use_data): @data($data), @slot($slot), @@ -1353,27 +1359,30 @@ macro_rules! __init_internal { // return when an error/panic occurs. unsafe { $crate::Init::__init(init, ::core::ptr::addr_of_mut!((*$slot).$field))? }; - // NOTE: the field accessor ensures that the initialized field is properly aligned. + // NOTE: this ensures that the initialized field is properly aligned. // Unaligned fields will cause the compiler to emit E0793. We do not support // unaligned fields since `Init::__init` requires an aligned pointer; the call to // `ptr::write` below has the same requirement. - // SAFETY: - // - the field is not structurally pinned, since the line above must compile, - // - the field has been initialized, - // - the reference is only valid until the end of the initializer. - #[allow(unused_variables)] - let $field = unsafe { &mut (*$slot).$field }; + // SAFETY: the field has been initialized. + let _ = unsafe { &mut (*$slot).$field }; // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. // We use `paste!` to create new hygiene for `$field`. $crate::macros::paste! { - // SAFETY: We forget the guard later when initialization has succeeded. - let [< __ $field _guard >] = unsafe { + // SAFETY: + // - `addr_of_mut!((*$slot).$field)` is valid. + // - `(*$slot).$field` has been initialized above. + // - We only need the ownership to the pointee back when initialization has + // succeeded, where we `forget` the guard. + let mut [< __ $field _guard >] = unsafe { $crate::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) }; + #[allow(unused_variables)] + let $field = [< __ $field _guard >].let_binding(); + $crate::__init_internal!(init_slot(): @data($data), @slot($slot), @@ -1397,28 +1406,30 @@ macro_rules! __init_internal { unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) }; } - // NOTE: the field accessor ensures that the initialized field is properly aligned. + // NOTE: this ensures that the initialized field is properly aligned. // Unaligned fields will cause the compiler to emit E0793. We do not support // unaligned fields since `Init::__init` requires an aligned pointer; the call to // `ptr::write` below has the same requirement. - #[allow(unused_variables)] - // SAFETY: - // - the field is not structurally pinned, since no `use_data` was required to create this - // initializer, - // - the field has been initialized, - // - the reference is only valid until the end of the initializer. - let $field = unsafe { &mut (*$slot).$field }; + // SAFETY: the field has been initialized. + let _ = unsafe { &mut (*$slot).$field }; // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. // We use `paste!` to create new hygiene for `$field`. $crate::macros::paste! { - // SAFETY: We forget the guard later when initialization has succeeded. - let [< __ $field _guard >] = unsafe { + // SAFETY: + // - `addr_of_mut!((*$slot).$field)` is valid. + // - `(*$slot).$field` has been initialized above. + // - We only need the ownership to the pointee back when initialization has + // succeeded, where we `forget` the guard. + let mut [< __ $field _guard >] = unsafe { $crate::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) }; + #[allow(unused_variables)] + let $field = [< __ $field _guard >].let_binding(); + $crate::__init_internal!(init_slot(): @data($data), @slot($slot), @@ -1441,27 +1452,33 @@ macro_rules! __init_internal { // SAFETY: The memory at `slot` is uninitialized. unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) }; } - // NOTE: the field accessor ensures that the initialized field is properly aligned. + // NOTE: this ensures that the initialized field is properly aligned. // Unaligned fields will cause the compiler to emit E0793. We do not support // unaligned fields since `Init::__init` requires an aligned pointer; the call to // `ptr::write` below has the same requirement. - // SAFETY: - // - the project function does the correct field projection, - // - the field has been initialized, - // - the reference is only valid until the end of the initializer. - #[allow(unused_variables)] - let $field = $crate::macros::paste!(unsafe { $data.[< __project_ $field >](&mut (*$slot).$field) }); + // SAFETY: the field has been initialized. + let _ = unsafe { &mut (*$slot).$field }; // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. // We use `paste!` to create new hygiene for `$field`. $crate::macros::paste! { - // SAFETY: We forget the guard later when initialization has succeeded. - let [< __ $field _guard >] = unsafe { + // SAFETY: + // - `addr_of_mut!((*$slot).$field)` is valid. + // - `(*$slot).$field` has been initialized above. + // - We only need the ownership to the pointee back when initialization has + // succeeded, where we `forget` the guard. + let mut [< __ $field _guard >] = unsafe { $crate::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) }; + // NOTE: The reference is derived from the guard so that it only lives as long as + // the guard does and cannot escape the scope. + #[allow(unused_variables)] + // SAFETY: the project function does the correct field projection. + let $field = unsafe { $data.[< __project_ $field >]([< __ $field _guard >].let_binding()) }; + $crate::__init_internal!(init_slot($use_data): @data($data), @slot($slot), From 28f5ed477eef166d678d6966762cbc1de9b4f436 Mon Sep 17 00:00:00 2001 From: Prathyushi Nangia Date: Tue, 9 Dec 2025 10:01:33 -0600 Subject: [PATCH 552/554] x86/CPU/AMD: Prevent improper isolation of shared resources in Zen2's op cache commit c21b90f77687075115d989e53a8ec5e2bb427ab1 upstream. Make sure resources are not improperly shared in the op cache and cause instruction corruption this way. Signed-off-by: Prathyushi Nangia Co-developed-by: Borislav Petkov (AMD) Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 3 ++- arch/x86/kernel/cpu/amd.c | 3 +++ tools/arch/x86/include/asm/msr-index.h | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 66ddea295c266..d0a0cc8e8bd99 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -767,9 +767,10 @@ #define MSR_AMD64_LBR_SELECT 0xc000010e /* Zen4 */ -#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG 0xc001102e #define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 +#define MSR_ZEN2_BP_CFG_BUG_FIX_BIT 33 /* Fam 19h MSRs */ #define MSR_F19H_UMC_PERF_CTL 0xc0010800 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ec965de4abec9..138ff22a4926a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -994,6 +994,9 @@ static void init_amd_zen2(struct cpuinfo_x86 *c) /* Correct misconfigured CPUID on some clients. */ clear_cpu_cap(c, X86_FEATURE_INVLPGB); + + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN2_BP_CFG_BUG_FIX_BIT); } static void init_amd_zen3(struct cpuinfo_x86 *c) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 9e1720d73244f..7de315d8b2460 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -761,9 +761,10 @@ #define MSR_AMD64_LBR_SELECT 0xc000010e /* Zen4 */ -#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG 0xc001102e #define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 +#define MSR_ZEN2_BP_CFG_BUG_FIX_BIT 33 /* Fam 19h MSRs */ #define MSR_F19H_UMC_PERF_CTL 0xc0010800 From 1aa60fea7f637c071f529ad6784aecca2f2f0c5f Mon Sep 17 00:00:00 2001 From: Shota Zaizen Date: Tue, 28 Apr 2026 19:02:55 +0900 Subject: [PATCH 553/554] ksmbd: validate inherited ACE SID length commit 996454bc0da84d5a1dedb1a7861823087e01a7ae upstream. smb_inherit_dacl() walks the parent directory DACL loaded from the security descriptor xattr. It verifies that each ACE contains the fixed SID header before using it, but does not verify that the variable-length SID described by sid.num_subauth is fully contained in the ACE. A malformed inheritable ACE can advertise more subauthorities than are present in the ACE. compare_sids() may then read past the ACE. smb_set_ace() also clamps the copied destination SID, but used the unchecked source SID count to compute the inherited ACE size. That could advance the temporary inherited ACE buffer pointer and nt_size accounting past the allocated buffer. Fix this by validating the parent ACE SID count and SID length before using the SID during inheritance. Compute the inherited ACE size from the copied SID so the size matches the bounded destination SID. Reject the inherited DACL if size accumulation would overflow smb_acl.size or the security descriptor allocation size. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Signed-off-by: Shota Zaizen Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smbacl.c | 66 +++++++++++++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 14 deletions(-) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index eae9daeb0a414..a0e0dc56c7300 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1068,7 +1068,26 @@ static void smb_set_ace(struct smb_ace *ace, const struct smb_sid *sid, u8 type, ace->flags = flags; ace->access_req = access_req; smb_copy_sid(&ace->sid, sid); - ace->size = cpu_to_le16(1 + 1 + 2 + 4 + 1 + 1 + 6 + (sid->num_subauth * 4)); + ace->size = cpu_to_le16(1 + 1 + 2 + 4 + 1 + 1 + 6 + + (ace->sid.num_subauth * 4)); +} + +static int smb_append_inherited_ace(struct smb_ace **ace, int *nt_size, + u16 *ace_cnt, const struct smb_sid *sid, + u8 type, u8 flags, __le32 access_req) +{ + int ace_size; + + smb_set_ace(*ace, sid, type, flags, access_req); + ace_size = le16_to_cpu((*ace)->size); + /* pdacl->size is __le16 and includes struct smb_acl. */ + if (check_add_overflow(*nt_size, ace_size, nt_size) || + *nt_size > U16_MAX - (int)sizeof(struct smb_acl)) + return -EINVAL; + + (*ace_cnt)++; + *ace = (struct smb_ace *)((char *)*ace + ace_size); + return 0; } int smb_inherit_dacl(struct ksmbd_conn *conn, @@ -1157,6 +1176,12 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, CIFS_SID_BASE_SIZE) break; + if (parent_aces->sid.num_subauth > SID_MAX_SUB_AUTHORITIES || + pace_size < offsetof(struct smb_ace, sid) + + CIFS_SID_BASE_SIZE + + sizeof(__le32) * parent_aces->sid.num_subauth) + break; + aces_size -= pace_size; flags = parent_aces->flags; @@ -1186,22 +1211,24 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, } if (is_dir && creator && flags & CONTAINER_INHERIT_ACE) { - smb_set_ace(aces, psid, parent_aces->type, inherited_flags, - parent_aces->access_req); - nt_size += le16_to_cpu(aces->size); - ace_cnt++; - aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size)); + rc = smb_append_inherited_ace(&aces, &nt_size, &ace_cnt, + psid, parent_aces->type, + inherited_flags, + parent_aces->access_req); + if (rc) + goto free_aces_base; flags |= INHERIT_ONLY_ACE; psid = creator; } else if (is_dir && !(parent_aces->flags & NO_PROPAGATE_INHERIT_ACE)) { psid = &parent_aces->sid; } - smb_set_ace(aces, psid, parent_aces->type, flags | inherited_flags, - parent_aces->access_req); - nt_size += le16_to_cpu(aces->size); - aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size)); - ace_cnt++; + rc = smb_append_inherited_ace(&aces, &nt_size, &ace_cnt, psid, + parent_aces->type, + flags | inherited_flags, + parent_aces->access_req); + if (rc) + goto free_aces_base; pass: parent_aces = (struct smb_ace *)((char *)parent_aces + pace_size); } @@ -1211,7 +1238,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, struct smb_acl *pdacl; struct smb_sid *powner_sid = NULL, *pgroup_sid = NULL; int powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size; - int pntsd_alloc_size; + size_t pntsd_alloc_size; if (parent_pntsd->osidoffset) { powner_sid = (struct smb_sid *)((char *)parent_pntsd + @@ -1224,8 +1251,19 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, pgroup_sid_size = 1 + 1 + 6 + (pgroup_sid->num_subauth * 4); } - pntsd_alloc_size = sizeof(struct smb_ntsd) + powner_sid_size + - pgroup_sid_size + sizeof(struct smb_acl) + nt_size; + if (check_add_overflow(sizeof(struct smb_ntsd), + (size_t)powner_sid_size, + &pntsd_alloc_size) || + check_add_overflow(pntsd_alloc_size, + (size_t)pgroup_sid_size, + &pntsd_alloc_size) || + check_add_overflow(pntsd_alloc_size, sizeof(struct smb_acl), + &pntsd_alloc_size) || + check_add_overflow(pntsd_alloc_size, (size_t)nt_size, + &pntsd_alloc_size)) { + rc = -EINVAL; + goto free_aces_base; + } pntsd = kzalloc(pntsd_alloc_size, KSMBD_DEFAULT_GFP); if (!pntsd) { From 52dc660c85dbab60c07c8f1a716d68a4b7d4fc7e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 14 May 2026 15:30:26 +0200 Subject: [PATCH 554/554] Linux 6.18.30 Link: https://lore.kernel.org/r/20260512173938.452574370@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: Peter Schneider Tested-by: Brett A C Sheffield Tested-by: Mark Brown Tested-by: Shuah Khan Link: https://lore.kernel.org/r/20260513153744.746440810@linuxfoundation.org Tested-by: Brett A C Sheffield Tested-by: Florian Fainelli Tested-by: Ron Economos Tested-by: Mark Brown Tested-by: Barry K. Nathan Tested-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 65e97b7e2aa84..f4706ffa9b1ad 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 18 -SUBLEVEL = 29 +SUBLEVEL = 30 EXTRAVERSION = NAME = Baby Opossum Posse