From 249d2baf9b494381f2ff4f21feeaaa47eaa78e7c Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 3 Jun 2016 14:06:14 -0700 Subject: [PATCH 001/433] ANDROID: dm verity fec: limit error correction recursion If verity tree itself is sufficiently corrupted in addition to data blocks, it's possible for error correction to end up in a deep recursive error correction loop that eventually causes a kernel panic as follows: [ 14.728962] [] verity_fec_decode+0xa8/0x138 [ 14.734691] [] verity_verify_level+0x11c/0x180 [ 14.740681] [] verity_hash_for_block+0x88/0xe0 [ 14.746671] [] fec_decode_rsb+0x318/0x75c [ 14.752226] [] verity_fec_decode+0xa8/0x138 [ 14.757956] [] verity_verify_level+0x11c/0x180 [ 14.763944] [] verity_hash_for_block+0x88/0xe0 This change limits the recursion to a reasonable level during a single I/O operation. Bug: 28943429 Signed-off-by: Sami Tolvanen Change-Id: I0a7ebff331d259c59a5e03c81918cc1613c3a766 (cherry picked from commit f4b9e40597e73942d2286a73463c55f26f61bfa7) --- drivers/md/dm-verity-fec.c | 11 ++++++++++- drivers/md/dm-verity-fec.h | 4 ++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index ad10d6d8ed28dd..b26809a47ca37d 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -442,6 +442,13 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, if (!verity_fec_is_enabled(v)) return -EOPNOTSUPP; + if (fio->level >= DM_VERITY_FEC_MAX_RECURSION) { + DMWARN_LIMIT("%s: FEC: recursion too deep", v->data_dev->name); + return -EIO; + } + + fio->level++; + if (type == DM_VERITY_BLOCK_TYPE_METADATA) block += v->data_blocks; @@ -475,7 +482,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, if (r < 0) { r = fec_decode_rsb(v, io, fio, rsb, offset, true); if (r < 0) - return r; + goto done; } if (dest) @@ -485,6 +492,8 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, r = verity_for_bv_block(v, io, iter, fec_bv_copy); } +done: + fio->level--; return r; } diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h index 8c4bee052a7354..b8e21cef3ad193 100644 --- a/drivers/md/dm-verity-fec.h +++ b/drivers/md/dm-verity-fec.h @@ -28,6 +28,9 @@ #define DM_VERITY_FEC_BUF_MAX \ (1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS)) +/* maximum recursion level for verity_fec_decode */ +#define DM_VERITY_FEC_MAX_RECURSION 4 + #define DM_VERITY_OPT_FEC_DEV "use_fec_from_device" #define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks" #define DM_VERITY_OPT_FEC_START "fec_start" @@ -61,6 +64,7 @@ struct dm_verity_fec_io { unsigned nbufs; /* number of buffers allocated */ u8 *output; /* buffer for corrected output */ size_t output_pos; + unsigned level; /* recursion level */ }; #ifdef CONFIG_DM_VERITY_FEC From c4d8e3e8d2aa09f5826f517339baa7632f7873ea Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 3 Jun 2016 14:22:46 -0700 Subject: [PATCH 002/433] ANDROID: dm verity fec: add missing release from fec_ktype Add a release function to allow destroying the dm-verity device. Bug: 27928374 Signed-off-by: Sami Tolvanen Change-Id: Ic0f7c17e4889c5580d70b52d9a709a37165a5747 (cherry picked from commit 0039ccf47c8f99888f7b71b2a36a68a027fbe357) --- drivers/md/dm-verity-fec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index b26809a47ca37d..454535d23a7f4b 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -689,7 +689,8 @@ static struct attribute *fec_attrs[] = { static struct kobj_type fec_ktype = { .sysfs_ops = &kobj_sysfs_ops, - .default_attrs = fec_attrs + .default_attrs = fec_attrs, + .release = dm_kobject_release }; /* From 8f9576b38193b8eb32d94d17f02baab436c3580a Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 17 Jun 2016 11:22:03 -0700 Subject: [PATCH 003/433] ANDROID: dm verity fec: fix RS block calculation A call to do_div was changed in Linux 4.5 to div64_u64 in verity_fec_decode, which broke RS block calculation due to incompatible semantics. This change fixes the computation. Bug: 21893453 Change-Id: Idb88b901e0209c2cccc9c0796689f780592d58f9 Signed-off-by: Sami Tolvanen (cherry picked from commit 879aac93eebcc2862d71afa9eca3a0c0f51b3b01) --- drivers/md/dm-verity-fec.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 454535d23a7f4b..a1e8571ce31450 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -463,9 +463,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, */ offset = block << v->data_dev_block_bits; - - res = offset; - div64_u64(res, v->fec->rounds << v->data_dev_block_bits); + res = div64_u64(offset, v->fec->rounds << v->data_dev_block_bits); /* * The base RS block we can feed to the interleaver to find out all From d053106b93363b77ef8ca60352069cbdc8fa0f87 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 17 Jun 2016 11:31:17 -0700 Subject: [PATCH 004/433] ANDROID: dm verity fec: initialize recursion level Explicitly initialize recursion level to zero at the beginning of each I/O operation. Bug: 28943429 Change-Id: I00c612be2b8c22dd5afb65a739551df91cb324fc Signed-off-by: Sami Tolvanen (cherry picked from commit 32ffb3a22d7fd269b2961323478ece92c06a8334) --- drivers/md/dm-verity-fec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index a1e8571ce31450..1dd667b975307e 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -532,6 +532,7 @@ void verity_fec_init_io(struct dm_verity_io *io) memset(fio->bufs, 0, sizeof(fio->bufs)); fio->nbufs = 0; fio->output = NULL; + fio->level = 0; } /* From 92ceaef1ac541eaad18a52cc536e081d783fe008 Mon Sep 17 00:00:00 2001 From: Guodong Xu Date: Thu, 28 Apr 2016 11:56:03 +0800 Subject: [PATCH 005/433] i2s: hi6210: Fix error handling in probe that can cause a panic On error devm_ioremap_resource returns an error pointer, not NULL, so correct the error handling to properly fail and not cause bad pointer traversals. Signed-off-by: Guodong Xu [jstultz: Reworked commit message] Signed-off-by: John Stultz --- sound/soc/hisilicon/hi6210-i2s.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sound/soc/hisilicon/hi6210-i2s.c b/sound/soc/hisilicon/hi6210-i2s.c index e39cf7acf8cf53..8e44de6f938c3d 100644 --- a/sound/soc/hisilicon/hi6210-i2s.c +++ b/sound/soc/hisilicon/hi6210-i2s.c @@ -526,9 +526,9 @@ static int hi6210_i2s_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, i2s); i2s->base = devm_ioremap_resource(dev, res); - if (i2s->base == NULL) { + if (IS_ERR(i2s->base)) { dev_err(&pdev->dev, "ioremap failed\n"); - ret = -ENOMEM; + ret = PTR_ERR(i2s->base); goto err2; } @@ -537,10 +537,10 @@ static int hi6210_i2s_probe(struct platform_device *pdev) ret = -ENODEV; goto err2; } - i2s->base_syscon = devm_ioremap_resource(dev, res); - if (i2s->base_syscon == NULL) { + i2s->base_syscon = devm_ioremap(dev, res->start, resource_size(res)); + if (IS_ERR(i2s->base_syscon)) { dev_err(&pdev->dev, "ioremap failed\n"); - ret = -ENOMEM; + ret = PTR_ERR(i2s->base_syscon); goto err2; } @@ -550,9 +550,9 @@ static int hi6210_i2s_probe(struct platform_device *pdev) goto err2; } i2s->base_pmctrl = devm_ioremap_resource(dev, res); - if (i2s->base_pmctrl == NULL) { + if (IS_ERR(i2s->base_pmctrl)) { dev_err(&pdev->dev, "ioremap failed\n"); - ret = -ENOMEM; + ret = PTR_ERR(i2s->base_pmctrl); goto err2; } From ab4f14dd052ba453558a8913d8d5547abbfc7b88 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 7 Mar 2016 11:31:10 +0100 Subject: [PATCH 006/433] UPSTREAM: usbnet: cleanup after bind() in probe() (cherry pick from commit 1666984c8625b3db19a9abc298931d35ab7bc64b) In case bind() works, but a later error forces bailing in probe() in error cases work and a timer may be scheduled. They must be killed. This fixes an error case related to the double free reported in http://www.spinics.net/lists/netdev/msg367669.html and needs to go on top of Linus' fix to cdc-ncm. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller Bug: 28744625 --- drivers/net/usb/usbnet.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 0744bf2ef2d6e4..c2ea4e5666fb96 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1766,6 +1766,13 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) if (info->unbind) info->unbind (dev, udev); out1: + /* subdrivers must undo all they did in bind() if they + * fail it, but we may fail later and a deferred kevent + * may trigger an error resubmitting itself and, worse, + * schedule a timer. So we kill it all just in case. + */ + cancel_work_sync(&dev->kevent); + del_timer_sync(&dev->delay); free_netdev(net); out: return status; From 872e24621a63b75b3b644f6369b04bfb092264b1 Mon Sep 17 00:00:00 2001 From: Thierry Strudel Date: Tue, 14 Jun 2016 17:46:44 -0700 Subject: [PATCH 007/433] ANDROID: cpu: send KOBJ_ONLINE event when enabling cpus In case some sysfs nodes needs to be labeled with a different label than sysfs then user needs to be notified when a core is brought back online. Signed-off-by: Thierry Strudel Bug: 29359497 Change-Id: I0395c86e01cd49c348fda8f93087d26f88557c91 --- kernel/cpu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/cpu.c b/kernel/cpu.c index 37731292f8a160..9ced7c7516481f 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -627,6 +627,7 @@ void __weak arch_enable_nonboot_cpus_end(void) void enable_nonboot_cpus(void) { int cpu, error; + struct device *cpu_device; /* Allow everyone to use the CPU hotplug again */ cpu_maps_update_begin(); @@ -644,6 +645,12 @@ void enable_nonboot_cpus(void) trace_suspend_resume(TPS("CPU_ON"), cpu, false); if (!error) { pr_info("CPU%d is up\n", cpu); + cpu_device = get_cpu_device(cpu); + if (!cpu_device) + pr_err("%s: failed to get cpu%d device\n", + __func__, cpu); + else + kobject_uevent(&cpu_device->kobj, KOBJ_ONLINE); continue; } pr_warn("Error taking CPU%d up: %d\n", cpu, error); From 6da57e2f0beee58a6570719a865f4e1b1f7822c2 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 23 Jun 2016 11:51:39 +0530 Subject: [PATCH 008/433] ANDROID: configs: remove unused configs Remove following configs which no longer exist: CONFIG_IP6_NF_TARGET_REJECT_SKERR CONFIG_IP_NF_TARGET_REJECT_SKERR CONFIG_RESOURCE_COUNTERS CONFIG_TABLET_USB_WACOM Signed-off-by: Amit Pundir --- android/configs/android-base.cfg | 3 --- android/configs/android-recommended.cfg | 1 - 2 files changed, 4 deletions(-) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 6db5542a51f4c0..bdd99f2becfc5b 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -37,7 +37,6 @@ CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MANGLE=y CONFIG_IP6_NF_RAW=y CONFIG_IP6_NF_TARGET_REJECT=y -CONFIG_IP6_NF_TARGET_REJECT_SKERR=y CONFIG_IPV6=y CONFIG_IPV6_MIP6=y CONFIG_IPV6_MULTIPLE_TABLES=y @@ -64,7 +63,6 @@ CONFIG_IP_NF_TARGET_MASQUERADE=y CONFIG_IP_NF_TARGET_NETMAP=y CONFIG_IP_NF_TARGET_REDIRECT=y CONFIG_IP_NF_TARGET_REJECT=y -CONFIG_IP_NF_TARGET_REJECT_SKERR=y CONFIG_NET=y CONFIG_NETDEVICES=y CONFIG_NETFILTER=y @@ -140,7 +138,6 @@ CONFIG_PPP_DEFLATE=y CONFIG_PPP_MPPE=y CONFIG_PREEMPT=y CONFIG_QUOTA=y -CONFIG_RESOURCE_COUNTERS=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y CONFIG_SECURITY=y diff --git a/android/configs/android-recommended.cfg b/android/configs/android-recommended.cfg index 35936afdcae440..c3222a77ba24a3 100644 --- a/android/configs/android-recommended.cfg +++ b/android/configs/android-recommended.cfg @@ -110,7 +110,6 @@ CONFIG_TABLET_USB_AIPTEK=y CONFIG_TABLET_USB_GTCO=y CONFIG_TABLET_USB_HANWANG=y CONFIG_TABLET_USB_KBTAB=y -CONFIG_TABLET_USB_WACOM=y CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_IO_ACCOUNTING=y From 0dec8c0d67c64401d97122e4eba347ccc5850622 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 13 May 2016 12:04:06 -0700 Subject: [PATCH 009/433] scsi_lib: correctly retry failed zero length REQ_TYPE_FS commands commit a621bac3044ed6f7ec5fa0326491b2d4838bfa93 upstream. When SCSI was written, all commands coming from the filesystem (REQ_TYPE_FS commands) had data. This meant that our signal for needing to complete the command was the number of bytes completed being equal to the number of bytes in the request. Unfortunately, with the advent of flush barriers, we can now get zero length REQ_TYPE_FS commands, which confuse this logic because they satisfy the condition every time. This means they never get retried even for retryable conditions, like UNIT ATTENTION because we complete them early assuming they're done. Fix this by special casing the early completion condition to recognise zero length commands with errors and let them drop through to the retry code. Reported-by: Sebastian Parschauer Signed-off-by: James E.J. Bottomley Tested-by: Jack Wang Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_lib.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index dd8ad2a44510ca..cf5b99e1f12b08 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -910,9 +910,12 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) } /* - * If we finished all bytes in the request we are done now. + * special case: failed zero length commands always need to + * drop down into the retry code. Otherwise, if we finished + * all bytes in the request we are done now. */ - if (!scsi_end_request(req, error, good_bytes, 0)) + if (!(blk_rq_bytes(req) == 0 && error) && + !scsi_end_request(req, error, good_bytes, 0)) return; /* From c54c115da7214a41a697180964cf6d7a5a50b599 Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Tue, 31 May 2016 09:42:29 -0400 Subject: [PATCH 010/433] scsi: Add QEMU CD-ROM to VPD Inquiry Blacklist commit fbd83006e3e536fcb103228d2422ea63129ccb03 upstream. Linux fails to boot as a guest with a QEMU CD-ROM: [ 4.439488] ata2.00: ATAPI: QEMU CD-ROM, 0.8.2, max UDMA/100 [ 4.443649] ata2.00: configured for MWDMA2 [ 4.450267] scsi 1:0:0:0: CD-ROM QEMU QEMU CD-ROM 0.8. PQ: 0 ANSI: 5 [ 4.464317] ata2.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x6 frozen [ 4.464319] ata2.00: BMDMA stat 0x5 [ 4.464339] ata2.00: cmd a0/01:00:00:00:01/00:00:00:00:00/a0 tag 0 dma 16640 in [ 4.464339] Inquiry 12 01 00 00 ff 00res 48/20:02:00:24:00/00:00:00:00:00/a0 Emask 0x2 (HSM violation) [ 4.464341] ata2.00: status: { DRDY DRQ } [ 4.465864] ata2: soft resetting link [ 4.625971] ata2.00: configured for MWDMA2 [ 4.628290] ata2: EH complete [ 4.646670] ata2.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x6 frozen [ 4.646671] ata2.00: BMDMA stat 0x5 [ 4.646683] ata2.00: cmd a0/01:00:00:00:01/00:00:00:00:00/a0 tag 0 dma 16640 in [ 4.646683] Inquiry 12 01 00 00 ff 00res 48/20:02:00:24:00/00:00:00:00:00/a0 Emask 0x2 (HSM violation) [ 4.646685] ata2.00: status: { DRDY DRQ } [ 4.648193] ata2: soft resetting link ... Fix this by suppressing VPD inquiry for this device. Signed-off-by: Ewan D. Milne Reported-by: Jan Stancek Tested-by: Jan Stancek Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_devinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index da2e068ee47d15..93cbefa75b26db 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -227,6 +227,7 @@ static struct { {"PIONEER", "CD-ROM DRM-624X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC}, {"Promise", "", NULL, BLIST_SPARSELUN}, + {"QEMU", "QEMU CD-ROM", NULL, BLIST_SKIP_VPD_PAGES}, {"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024}, {"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024}, {"QUANTUM", "XP34301", "1071", BLIST_NOTQ}, From 23cdd8c3cbe9d790f23d7f9ae14e9b828f56f69c Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Mon, 16 May 2016 11:14:54 +0200 Subject: [PATCH 011/433] tipc: check nl sock before parsing nested attributes [ Upstream commit 45e093ae2830cd1264677d47ff9a95a71f5d9f9c ] Make sure the socket for which the user is listing publication exists before parsing the socket netlink attributes. Prior to this patch a call without any socket caused a NULL pointer dereference in tipc_nl_publ_dump(). Tested-and-reported-by: Baozeng Ding Signed-off-by: Richard Alpe Acked-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e53003cf7703af..9b713e0ce00dbb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2814,6 +2814,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; + if (!attrs[TIPC_NLA_SOCK]) + return -EINVAL; + err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], tipc_nl_sock_policy); From 49543942beb1b9ca95709d6cfa67708932aa4d11 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 16 May 2016 17:28:16 +0800 Subject: [PATCH 012/433] netlink: Fix dump skb leak/double free [ Upstream commit 92964c79b357efd980812c4de5c1fd2ec8bb5520 ] When we free cb->skb after a dump, we do it after releasing the lock. This means that a new dump could have started in the time being and we'll end up freeing their skb instead of ours. This patch saves the skb and module before we unlock so we free the right memory. Fixes: 16b304f3404f ("netlink: Eliminate kmalloc in netlink dump operation.") Reported-by: Baozeng Ding Signed-off-by: Herbert Xu Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 992b35fb8615ca..7a5fa0c9837764 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2784,6 +2784,7 @@ static int netlink_dump(struct sock *sk) struct netlink_callback *cb; struct sk_buff *skb = NULL; struct nlmsghdr *nlh; + struct module *module; int len, err = -ENOBUFS; int alloc_min_size; int alloc_size; @@ -2863,9 +2864,11 @@ static int netlink_dump(struct sock *sk) cb->done(cb); nlk->cb_running = false; + module = cb->module; + skb = cb->skb; mutex_unlock(nlk->cb_mutex); - module_put(cb->module); - consume_skb(cb->skb); + module_put(module); + consume_skb(skb); return 0; errout_skb: From 6a58f3e12e8e5c330940b38cc8c98d52171aefa8 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Tue, 17 May 2016 16:57:37 +0200 Subject: [PATCH 013/433] tipc: fix nametable publication field in nl compat [ Upstream commit 03aaaa9b941e136757b55c4cf775aab6068dfd94 ] The publication field of the old netlink API should contain the publication key and not the publication reference. Fixes: 44a8ae94fd55 (tipc: convert legacy nl name table dump to nl compat) Signed-off-by: Richard Alpe Acked-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/netlink_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1eadc95e113294..2ed732bfe94b4d 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -802,7 +802,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, goto out; tipc_tlv_sprintf(msg->rep, "%-10u %s", - nla_get_u32(publ[TIPC_NLA_PUBL_REF]), + nla_get_u32(publ[TIPC_NLA_PUBL_KEY]), scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]); out: tipc_tlv_sprintf(msg->rep, "\n"); From 835d0122a57ffaea43162d0e7a4f5bf25d2af78b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 17 May 2016 18:58:08 +0200 Subject: [PATCH 014/433] switchdev: pass pointer to fib_info instead of copy [ Upstream commit da4ed55165d41b1073f9a476f1c18493e9bf8c8e ] The problem is that fib_info->nh is [0] so the struct fib_info allocation size depends on number of nexthops. If we just copy fib_info, we do not copy the nexthops info and driver accesses memory which is not ours. Given the fact that fib4 does not defer operations and therefore it does not need copy, just pass the pointer down to drivers as it was done before. Fixes: 850d0cbc91 ("switchdev: remove pointers from switchdev objects") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/rocker/rocker.c | 4 ++-- include/net/switchdev.h | 2 +- net/switchdev/switchdev.c | 6 ++---- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 2b34622a4bfe89..3920c3eb60061c 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4475,7 +4475,7 @@ static int rocker_port_obj_add(struct net_device *dev, fib4 = SWITCHDEV_OBJ_IPV4_FIB(obj); err = rocker_port_fib_ipv4(rocker_port, trans, htonl(fib4->dst), fib4->dst_len, - &fib4->fi, fib4->tb_id, 0); + fib4->fi, fib4->tb_id, 0); break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = rocker_port_fdb_add(rocker_port, trans, @@ -4547,7 +4547,7 @@ static int rocker_port_obj_del(struct net_device *dev, fib4 = SWITCHDEV_OBJ_IPV4_FIB(obj); err = rocker_port_fib_ipv4(rocker_port, NULL, htonl(fib4->dst), fib4->dst_len, - &fib4->fi, fib4->tb_id, + fib4->fi, fib4->tb_id, ROCKER_OP_FLAG_REMOVE); break; case SWITCHDEV_OBJ_ID_PORT_FDB: diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 1d22ce9f352e2e..31d0e51438486e 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -88,7 +88,7 @@ struct switchdev_obj_ipv4_fib { struct switchdev_obj obj; u32 dst; int dst_len; - struct fib_info fi; + struct fib_info *fi; u8 tos; u8 type; u32 nlflags; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index d5d7132ac84730..1b58866175e625 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1169,6 +1169,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, .dst = dst, .dst_len = dst_len, + .fi = fi, .tos = tos, .type = type, .nlflags = nlflags, @@ -1177,8 +1178,6 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, struct net_device *dev; int err = 0; - memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); - /* Don't offload route if using custom ip rules or if * IPv4 FIB offloading has been disabled completely. */ @@ -1222,6 +1221,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, .dst = dst, .dst_len = dst_len, + .fi = fi, .tos = tos, .type = type, .nlflags = 0, @@ -1230,8 +1230,6 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, struct net_device *dev; int err = 0; - memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); - if (!(fi->fib_flags & RTNH_F_OFFLOAD)) return 0; From bccd56fad0433ef1ed0661bb3a4055503cdec4e9 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 19 May 2016 13:36:51 +0800 Subject: [PATCH 015/433] tuntap: correctly wake up process during uninit [ Upstream commit addf8fc4acb1cf79492ac64966f07178793cb3d7 ] We used to check dev->reg_state against NETREG_REGISTERED after each time we are woke up. But after commit 9e641bdcfa4e ("net-tun: restructure tun_do_read for better sleep/wakeup efficiency"), it uses skb_recv_datagram() which does not check dev->reg_state. This will result if we delete a tun/tap device after a process is blocked in the reading. The device will wait for the reference count which was held by that process for ever. Fixes this by using RCV_SHUTDOWN which will be checked during sk_recv_datagram() before trying to wake up the process during uninit. Fixes: 9e641bdcfa4e ("net-tun: restructure tun_do_read for better sleep/wakeup efficiency") Cc: Eric Dumazet Cc: Xi Wang Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Eric Dumazet Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2d186bd66d4314..111b972e3053ea 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -567,11 +567,13 @@ static void tun_detach_all(struct net_device *dev) for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); BUG_ON(!tfile); + tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; tfile->socket.sk->sk_data_ready(tfile->socket.sk); RCU_INIT_POINTER(tfile->tun, NULL); --tun->numqueues; } list_for_each_entry(tfile, &tun->disabled, next) { + tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; tfile->socket.sk->sk_data_ready(tfile->socket.sk); RCU_INIT_POINTER(tfile->tun, NULL); } @@ -627,6 +629,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte goto out; } tfile->queue_index = tun->numqueues; + tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN; rcu_assign_pointer(tfile->tun, tun); rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); tun->numqueues++; @@ -1408,9 +1411,6 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, if (!iov_iter_count(to)) return 0; - if (tun->dev->reg_state != NETREG_REGISTERED) - return -EIO; - /* Read frames from queue */ skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0, &peeked, &off, &err); From 5b7ea922e1754107f77d146011612f2e42600cc1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 May 2016 17:22:48 -0500 Subject: [PATCH 016/433] bpf: Use mount_nodev not mount_ns to mount the bpf filesystem [ Upstream commit e27f4a942a0ee4b84567a3c6cfa84f273e55cbb7 ] While reviewing the filesystems that set FS_USERNS_MOUNT I spotted the bpf filesystem. Looking at the code I saw a broken usage of mount_ns with current->nsproxy->mnt_ns. As the code does not acquire a reference to the mount namespace it can not possibly be correct to store the mount namespace on the superblock as it does. Replace mount_ns with mount_nodev so that each mount of the bpf filesystem returns a distinct instance, and the code is not buggy. In discussion with Hannes Frederic Sowa it was reported that the use of mount_ns was an attempt to have one bpf instance per mount namespace, in an attempt to keep resources that pin resources from hiding. That intent simply does not work, the vfs is not built to allow that kind of behavior. Which means that the bpf filesystem really is buggy both semantically and in it's implemenation as it does not nor can it implement the original intent. This change is userspace visible, but my experience with similar filesystems leads me to believe nothing will break with a model of each mount of the bpf filesystem is distinct from all others. Fixes: b2197755b263 ("bpf: add support for persistent maps/progs") Cc: Hannes Frederic Sowa Acked-by: Daniel Borkmann Signed-off-by: "Eric W. Biederman" Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index d1a7646f79c5a1..00f978e9cf912c 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -358,7 +358,7 @@ static int bpf_fill_super(struct super_block *sb, void *data, int silent) static struct dentry *bpf_mount(struct file_system_type *type, int flags, const char *dev_name, void *data) { - return mount_ns(type, flags, current->nsproxy->mnt_ns, bpf_fill_super); + return mount_nodev(type, flags, data, bpf_fill_super); } static struct file_system_type bpf_fs_type = { From ab1f253ddccc235520fa4f70d32a8dd6bf8ef346 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 19 May 2016 15:58:33 +0200 Subject: [PATCH 017/433] udp: prevent skbs lingering in tunnel socket queues [ Upstream commit e5aed006be918af163eb397e45aa5ea6cefd5e01 ] In case we find a socket with encapsulation enabled we should call the encap_recv function even if just a udp header without payload is available. The callbacks are responsible for correctly verifying and dropping the packets. Also, in case the header validation fails for geneve and vxlan we shouldn't put the skb back into the socket queue, no one will pick them up there. Instead we can simply discard them in the respective encap_recv functions. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/geneve.c | 10 +++------- drivers/net/vxlan.c | 10 ++-------- net/ipv4/udp.c | 2 +- net/ipv6/udp.c | 2 +- 4 files changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 58efdec12f300d..e3f939fef6968a 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -310,15 +310,15 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) /* Need Geneve and inner Ethernet header to be present */ if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) - goto error; + goto drop; /* Return packets with reserved bits set */ geneveh = geneve_hdr(skb); if (unlikely(geneveh->ver != GENEVE_VER)) - goto error; + goto drop; if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) - goto error; + goto drop; opts_len = geneveh->opt_len * 4; if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, @@ -336,10 +336,6 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) /* Consume bad packet */ kfree_skb(skb); return 0; - -error: - /* Let the UDP layer deal with the skb */ - return 1; } static struct socket *geneve_create_sock(struct net *net, bool ipv6, diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 3c0df70e2f53aa..0496f1a8a57d75 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1254,7 +1254,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) /* Need Vxlan and inner Ethernet header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) - goto error; + goto drop; vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); flags = ntohl(vxh->vx_flags); @@ -1344,13 +1344,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) bad_flags: netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", ntohl(vxh->vx_flags), ntohl(vxh->vx_vni)); - -error: - if (tun_dst) - dst_release((struct dst_entry *)tun_dst); - - /* Return non vxlan pkt */ - return 1; + goto drop; } static int arp_reduce(struct net_device *dev, struct sk_buff *skb) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 21fbb54f11d0db..44e1632370dd26 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1531,7 +1531,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv) { + if (encap_rcv) { int ret; /* Verify checksum before giving to encap */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6665e1a0bfe1e5..275af43306f929 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -647,7 +647,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv) { + if (encap_rcv) { int ret; /* Verify checksum before giving to encap */ From f5f16bf66d7e07e5a04f07226caefeaf3136c83a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 19 May 2016 17:26:29 +0200 Subject: [PATCH 018/433] uapi glibc compat: fix compilation when !__USE_MISC in glibc [ Upstream commit f0a3fdca794d1e68ae284ef4caefe681f7c18e89 ] These structures are defined only if __USE_MISC is set in glibc net/if.h headers, ie when _BSD_SOURCE or _SVID_SOURCE are defined. CC: Jan Engelhardt CC: Josh Boyer CC: Stephen Hemminger CC: Waldemar Brodkorb CC: Gabriel Laskar CC: Mikko Rapeli Fixes: 4a91cb61bb99 ("uapi glibc compat: fix compile errors when glibc net/if.h included before linux/if.h") Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/libc-compat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index d5e38c73377c05..e4f048ee7043d1 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -52,7 +52,7 @@ #if defined(__GLIBC__) /* Coordinate with glibc net/if.h header. */ -#if defined(_NET_IF_H) +#if defined(_NET_IF_H) && defined(__USE_MISC) /* GLIBC headers included first so don't define anything * that would already be defined. */ From bfe951d547bf15bf1192abd20773e6603dacadf1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 22 May 2016 23:16:18 +0200 Subject: [PATCH 019/433] bpf, inode: disallow userns mounts [ Upstream commit 612bacad78ba6d0a91166fc4487af114bac172a8 ] Follow-up to commit e27f4a942a0e ("bpf: Use mount_nodev not mount_ns to mount the bpf filesystem"), which removes the FS_USERNS_MOUNT flag. The original idea was to have a per mountns instance instead of a single global fs instance, but that didn't work out and we had to switch to mount_nodev() model. The intent of that middle ground was that we avoid users who don't play nice to create endless instances of bpf fs which are difficult to control and discover from an admin point of view, but at the same time it would have allowed us to be more flexible with regard to namespaces. Therefore, since we now did the switch to mount_nodev() as a fix where individual instances are created, we also need to remove userns mount flag along with it to avoid running into mentioned situation. I don't expect any breakage at this early point in time with removing the flag and we can revisit this later should the requirement for this come up with future users. This and commit e27f4a942a0e have been split to facilitate tracking should any of them run into the unlikely case of causing a regression. Fixes: b2197755b263 ("bpf: add support for persistent maps/progs") Signed-off-by: Daniel Borkmann Acked-by: Hannes Frederic Sowa Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 00f978e9cf912c..cb85d228b1acd6 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -366,7 +366,6 @@ static struct file_system_type bpf_fs_type = { .name = "bpf", .mount = bpf_mount, .kill_sb = kill_litter_super, - .fs_flags = FS_USERNS_MOUNT, }; MODULE_ALIAS_FS("bpf"); From 450db517b9223908ab5c1a9b5ddce3b6c4d75b18 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 24 May 2016 18:53:36 +0100 Subject: [PATCH 020/433] sfc: on MC reset, clear PIO buffer linkage in TXQs [ Upstream commit c0795bf64cba4d1b796fdc5b74b33772841ed1bb ] Otherwise, if we fail to allocate new PIO buffers, our TXQs will try to use the old ones, which aren't there any more. Fixes: 183233bec810 "sfc: Allocate and link PIO buffers; map them with write-combining" Signed-off-by: Edward Cree Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sfc/ef10.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index e6a084a6be12d0..cbe9a330117a11 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -619,6 +619,17 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx) return rc; } +static void efx_ef10_forget_old_piobufs(struct efx_nic *efx) +{ + struct efx_channel *channel; + struct efx_tx_queue *tx_queue; + + /* All our existing PIO buffers went away */ + efx_for_each_channel(channel, efx) + efx_for_each_channel_tx_queue(tx_queue, channel) + tx_queue->piobuf = NULL; +} + #else /* !EFX_USE_PIO */ static int efx_ef10_alloc_piobufs(struct efx_nic *efx, unsigned int n) @@ -635,6 +646,10 @@ static void efx_ef10_free_piobufs(struct efx_nic *efx) { } +static void efx_ef10_forget_old_piobufs(struct efx_nic *efx) +{ +} + #endif /* EFX_USE_PIO */ static void efx_ef10_remove(struct efx_nic *efx) @@ -1018,6 +1033,7 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx) nic_data->must_realloc_vis = true; nic_data->must_restore_filters = true; nic_data->must_restore_piobufs = true; + efx_ef10_forget_old_piobufs(efx); nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID; /* Driver-created vswitches and vports must be re-created */ From 13a055d6ca34dcce324ccfab63cc05db726030bc Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 25 May 2016 21:21:52 +0200 Subject: [PATCH 021/433] team: don't call netdev_change_features under team->lock [ Upstream commit f6988cb63a4e698d8a62a1d085d263d1fcc351ea ] The team_device_event() notifier calls team_compute_features() to fix vlan_features under team->lock to protect team->port_list. The problem is that subsequent __team_compute_features() calls netdev_change_features() to propagate vlan_features to upper vlan devices while team->lock is still taken. This can lead to deadlock when NETIF_F_LRO is modified on lower devices or team device itself. Example: The team0 as active backup with eth0 and eth1 NICs. Both eth0 & eth1 are LRO capable and LRO is enabled. Thus LRO is also enabled on team0. The command 'ethtool -K team0 lro off' now hangs due to this deadlock: dev_ethtool() -> ethtool_set_features() -> __netdev_update_features(team) -> netdev_sync_lower_features() -> netdev_update_features(lower_1) -> __netdev_update_features(lower_1) -> netdev_features_change(lower_1) -> call_netdevice_notifiers(...) -> team_device_event(lower_1) -> team_compute_features(team) [TAKES team->lock] -> netdev_change_features(team) -> __netdev_update_features(team) -> netdev_sync_lower_features() -> netdev_update_features(lower_2) -> __netdev_update_features(lower_2) -> netdev_features_change(lower_2) -> call_netdevice_notifiers(...) -> team_device_event(lower_2) -> team_compute_features(team) [DEADLOCK] The bug is present in team from the beginning but it appeared after the commit fd867d5 (net/core: generic support for disabling netdev features down stack) that adds synchronization of features with lower devices. Fixes: fd867d5 (net/core: generic support for disabling netdev features down stack) Cc: Jiri Pirko Signed-off-by: Ivan Vecera Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 59fefca7426386..a5f392ae30d544 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -969,7 +969,7 @@ static void team_port_disable(struct team *team, NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ NETIF_F_HIGHDMA | NETIF_F_LRO) -static void __team_compute_features(struct team *team) +static void ___team_compute_features(struct team *team) { struct team_port *port; u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL; @@ -993,15 +993,20 @@ static void __team_compute_features(struct team *team) team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; if (dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM)) team->dev->priv_flags |= IFF_XMIT_DST_RELEASE; +} +static void __team_compute_features(struct team *team) +{ + ___team_compute_features(team); netdev_change_features(team->dev); } static void team_compute_features(struct team *team) { mutex_lock(&team->lock); - __team_compute_features(team); + ___team_compute_features(team); mutex_unlock(&team->lock); + netdev_change_features(team->dev); } static int team_port_enter(struct team *team, struct team_port *port) From 721976e93e5d8963d0c937ee236489968bfcfb81 Mon Sep 17 00:00:00 2001 From: Chen Haiquan Date: Fri, 27 May 2016 10:49:11 +0800 Subject: [PATCH 022/433] vxlan: Accept user specified MTU value when create new vxlan link [ Upstream commit ce577668a426c6a9e2470a09dcd07fbd6e45272a ] When create a new vxlan link, example: ip link add vtap mtu 1440 type vxlan vni 1 dev eth0 The argument "mtu" has no effect, because it is not set to conf->mtu. The default value is used in vxlan_dev_configure function. This problem was introduced by commit 0dfbdf4102b9 (vxlan: Factor out device configuration). Fixes: 0dfbdf4102b9 (vxlan: Factor out device configuration) Signed-off-by: Chen Haiquan Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 0496f1a8a57d75..0b24f51718bef0 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2985,6 +2985,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL; + if (tb[IFLA_MTU]) + conf.mtu = nla_get_u32(tb[IFLA_MTU]); + err = vxlan_dev_configure(src_net, dev, &conf); switch (err) { case -ENODEV: From f946ceab11c4bede9b3d6c0d56f0964d65bfff65 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 6 Jun 2016 15:07:18 -0700 Subject: [PATCH 023/433] tcp: record TLP and ER timer stats in v6 stats [ Upstream commit ce3cf4ec0305919fc69a972f6c2b2efd35d36abc ] The v6 tcp stats scan do not provide TLP and ER timer information correctly like the v4 version . This patch fixes that. Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Fixes: eed530b6c676 ("tcp: early retransmit") Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/tcp_ipv6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b8d405623f4f07..1a1cd3938fd0c3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1706,7 +1706,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { From 38f56354f4e1cfbaa1f2f10e9acb30f105b70aed Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 7 Jun 2016 19:14:17 +0900 Subject: [PATCH 024/433] bridge: Don't insert unnecessary local fdb entry on changing mac address [ Upstream commit 0b148def403153a4d1565f1640356cb78ce5109f ] The missing br_vlan_should_use() test caused creation of an unneeded local fdb entry on changing mac address of a bridge device when there is a vlan which is configured on a bridge port but not on the bridge device. Fixes: 2594e9064a57 ("bridge: vlan: add per-vlan struct and move to rhashtables") Signed-off-by: Toshiaki Makita Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_fdb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a642bb829d09cf..09442e0f7f67f2 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -278,6 +278,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) * change from under us. */ list_for_each_entry(v, &vg->vlan_list, vlist) { + if (!br_vlan_should_use(v)) + continue; f = __br_fdb_get(br, br->dev->dev_addr, v->vid); if (f && f->is_local && !f->dst) fdb_delete_local(br, NULL, f); From 05cbd46be7f1aaa355301e2e12c378cbfdeeeb2a Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 8 Jun 2016 12:59:17 +0200 Subject: [PATCH 025/433] l2tp: fix configuration passed to setup_udp_tunnel_sock() [ Upstream commit a5c5e2da8551eb69e5d5d09d51d526140b5db9fb ] Unused fields of udp_cfg must be all zeros. Otherwise setup_udp_tunnel_sock() fills ->gro_receive and ->gro_complete callbacks with garbage, eventually resulting in panic when used by udp_gro_receive(). [ 72.694123] BUG: unable to handle kernel paging request at ffff880033f87d78 [ 72.695518] IP: [] 0xffff880033f87d78 [ 72.696530] PGD 26e2067 PUD 26e3067 PMD 342ed063 PTE 8000000033f87163 [ 72.696530] Oops: 0011 [#1] SMP KASAN [ 72.696530] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core ip6_udp_tunnel udp_tunnel pptp gre pppox ppp_generic slhc crc32c_intel ghash_clmulni_intel jitterentropy_rng sha256_generic hmac drbg ansi_cprng aesni_intel evdev aes_x86_64 ablk_helper cryptd lrw gf128mul glue_helper serio_raw acpi_cpufreq button proc\ essor ext4 crc16 jbd2 mbcache virtio_blk virtio_net virtio_pci virtio_ring virtio [ 72.696530] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.7.0-rc1 #1 [ 72.696530] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 72.696530] task: ffff880035b59700 ti: ffff880035b70000 task.ti: ffff880035b70000 [ 72.696530] RIP: 0010:[] [] 0xffff880033f87d78 [ 72.696530] RSP: 0018:ffff880035f87bc0 EFLAGS: 00010246 [ 72.696530] RAX: ffffed000698f996 RBX: ffff88003326b840 RCX: ffffffff814cc823 [ 72.696530] RDX: ffff88003326b840 RSI: ffff880033e48038 RDI: ffff880034c7c780 [ 72.696530] RBP: ffff880035f87c18 R08: 000000000000a506 R09: 0000000000000000 [ 72.696530] R10: ffff880035f87b38 R11: ffff880034b9344d R12: 00000000ebfea715 [ 72.696530] R13: 0000000000000000 R14: ffff880034c7c780 R15: 0000000000000000 [ 72.696530] FS: 0000000000000000(0000) GS:ffff880035f80000(0000) knlGS:0000000000000000 [ 72.696530] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 72.696530] CR2: ffff880033f87d78 CR3: 0000000033c98000 CR4: 00000000000406a0 [ 72.696530] Stack: [ 72.696530] ffffffff814cc834 ffff880034b93468 0000001481416818 ffff88003326b874 [ 72.696530] ffff880034c7ccb0 ffff880033e48038 ffff88003326b840 ffff880034b93462 [ 72.696530] ffff88003326b88a ffff88003326b88c ffff880034b93468 ffff880035f87c70 [ 72.696530] Call Trace: [ 72.696530] [ 72.696530] [] ? udp_gro_receive+0x1c6/0x1f9 [ 72.696530] [] udp4_gro_receive+0x2b5/0x310 [ 72.696530] [] inet_gro_receive+0x4a3/0x4cd [ 72.696530] [] dev_gro_receive+0x584/0x7a3 [ 72.696530] [] ? __lock_is_held+0x29/0x64 [ 72.696530] [] napi_gro_receive+0x124/0x21d [ 72.696530] [] virtnet_receive+0x8df/0x8f6 [virtio_net] [ 72.696530] [] virtnet_poll+0x1d/0x8d [virtio_net] [ 72.696530] [] net_rx_action+0x15b/0x3b9 [ 72.696530] [] __do_softirq+0x216/0x546 [ 72.696530] [] irq_exit+0x49/0xb6 [ 72.696530] [] do_IRQ+0xe2/0xfa [ 72.696530] [] common_interrupt+0x89/0x89 [ 72.696530] [ 72.696530] [] ? trace_hardirqs_on_caller+0x229/0x270 [ 72.696530] [] ? default_idle+0x1c/0x2d [ 72.696530] [] ? default_idle+0x1a/0x2d [ 72.696530] [] arch_cpu_idle+0xa/0xc [ 72.696530] [] default_idle_call+0x1a/0x1c [ 72.696530] [] cpu_startup_entry+0x15b/0x20f [ 72.696530] [] start_secondary+0x12c/0x133 [ 72.696530] Code: ff ff ff ff ff ff ff ff ff ff 7f ff ff ff ff ff ff ff 7f 00 7e f8 33 00 88 ff ff 6d 61 58 81 ff ff ff ff 5e de 0a 81 ff ff ff ff <00> 5c e2 34 00 88 ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 72.696530] RIP [] 0xffff880033f87d78 [ 72.696530] RSP [ 72.696530] CR2: ffff880033f87d78 [ 72.696530] ---[ end trace ad7758b9a1dccf99 ]--- [ 72.696530] Kernel panic - not syncing: Fatal exception in interrupt [ 72.696530] Kernel Offset: disabled [ 72.696530] ---[ end Kernel panic - not syncing: Fatal exception in interrupt v2: use empty initialiser instead of "{ NULL }" to avoid relying on first field's type. Fixes: 38fd2af24fcf ("udp: Add socket based GRO and config") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index afca2eb4dfa777..ec17cbe8a02bb7 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1581,7 +1581,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { - struct udp_tunnel_sock_cfg udp_cfg; + struct udp_tunnel_sock_cfg udp_cfg = { }; udp_cfg.sk_user_data = tunnel; udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP; From 4d82f395bb0597a3aba23387406e2c07332d6ae9 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 8 Jun 2016 15:13:34 +0200 Subject: [PATCH 026/433] ipv6: Skip XFRM lookup if dst_entry in socket cache is valid [ Upstream commit 00bc0ef5880dc7b82f9c320dead4afaad48e47be ] At present we perform an xfrm_lookup() for each UDPv6 message we send. The lookup involves querying the flow cache (flow_cache_lookup) and, in case of a cache miss, creating an XFRM bundle. If we miss the flow cache, we can end up creating a new bundle and deriving the path MTU (xfrm_init_pmtu) from on an already transformed dst_entry, which we pass from the socket cache (sk->sk_dst_cache) down to xfrm_lookup(). This can happen only if we're caching the dst_entry in the socket, that is when we're using a connected UDP socket. To put it another way, the path MTU shrinks each time we miss the flow cache, which later on leads to incorrectly fragmented payload. It can be observed with ESPv6 in transport mode: 1) Set up a transformation and lower the MTU to trigger fragmentation # ip xfrm policy add dir out src ::1 dst ::1 \ tmpl src ::1 dst ::1 proto esp spi 1 # ip xfrm state add src ::1 dst ::1 \ proto esp spi 1 enc 'aes' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b # ip link set dev lo mtu 1500 2) Monitor the packet flow and set up an UDP sink # tcpdump -ni lo -ttt & # socat udp6-listen:12345,fork /dev/null & 3) Send a datagram that needs fragmentation with a connected socket # perl -e 'print "@" x 1470 | socat - udp6:[::1]:12345 2016/06/07 18:52:52 socat[724] E read(3, 0x555bb3d5ba00, 8192): Protocol error 00:00:00.000000 IP6 ::1 > ::1: frag (0|1448) ESP(spi=0x00000001,seq=0x2), length 1448 00:00:00.000014 IP6 ::1 > ::1: frag (1448|32) 00:00:00.000050 IP6 ::1 > ::1: ESP(spi=0x00000001,seq=0x3), length 1272 (^ ICMPv6 Parameter Problem) 00:00:00.000022 IP6 ::1 > ::1: ESP(spi=0x00000001,seq=0x5), length 136 4) Compare it to a non-connected socket # perl -e 'print "@" x 1500' | socat - udp6-sendto:[::1]:12345 00:00:40.535488 IP6 ::1 > ::1: frag (0|1448) ESP(spi=0x00000001,seq=0x6), length 1448 00:00:00.000010 IP6 ::1 > ::1: frag (1448|64) What happens in step (3) is: 1) when connecting the socket in __ip6_datagram_connect(), we perform an XFRM lookup, miss the flow cache, create an XFRM bundle, and cache the destination, 2) afterwards, when sending the datagram, we perform an XFRM lookup, again, miss the flow cache (due to mismatch of flowi6_iif and flowi6_oif, which is an issue of its own), and recreate an XFRM bundle based on the cached (and already transformed) destination. To prevent the recreation of an XFRM bundle, avoid an XFRM lookup altogether whenever we already have a destination entry cached in the socket. This prevents the path MTU shrinkage and brings us on par with UDPv4. The fix also benefits connected PINGv6 sockets, another user of ip6_sk_dst_lookup_flow(), who also suffer messages being transformed twice. Joint work with Hannes Frederic Sowa. Reported-by: Jan Tluka Signed-off-by: Jakub Sitnicki Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a175152d3e46e8..58900c21e4e425 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1072,17 +1072,12 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); - int err; dst = ip6_sk_dst_check(sk, dst, fl6); + if (!dst) + dst = ip6_dst_lookup_flow(sk, fl6, final_dst); - err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6); - if (err) - return ERR_PTR(err); - if (final_dst) - fl6->daddr = *final_dst; - - return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return dst; } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); From 3dc443059897b8a2fa3e3b18f794ee31c0063730 Mon Sep 17 00:00:00 2001 From: David Wragg Date: Fri, 3 Jun 2016 18:58:13 -0400 Subject: [PATCH 027/433] vxlan: Relax MTU constraints [ Upstream commit 72564b59ffc438ea103b0727a921aaddce766728 ] Allow the MTU of vxlan devices without an underlying device to be set to larger values (up to a maximum based on IP packet limits and vxlan overhead). Previously, their MTUs could not be set to higher than the conventional ethernet value of 1500. This is a very arbitrary value in the context of vxlan, and prevented vxlan devices from being able to take advantage of jumbo frames etc. The default MTU remains 1500, for compatibility. Signed-off-by: David Wragg Acked-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 0b24f51718bef0..2f44bc5b20a41d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2364,29 +2364,43 @@ static void vxlan_set_multicast_list(struct net_device *dev) { } -static int vxlan_change_mtu(struct net_device *dev, int new_mtu) +static int __vxlan_change_mtu(struct net_device *dev, + struct net_device *lowerdev, + struct vxlan_rdst *dst, int new_mtu, bool strict) { - struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_rdst *dst = &vxlan->default_dst; - struct net_device *lowerdev; - int max_mtu; + int max_mtu = IP_MAX_MTU; - lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex); - if (lowerdev == NULL) - return eth_change_mtu(dev, new_mtu); + if (lowerdev) + max_mtu = lowerdev->mtu; if (dst->remote_ip.sa.sa_family == AF_INET6) - max_mtu = lowerdev->mtu - VXLAN6_HEADROOM; + max_mtu -= VXLAN6_HEADROOM; else - max_mtu = lowerdev->mtu - VXLAN_HEADROOM; + max_mtu -= VXLAN_HEADROOM; - if (new_mtu < 68 || new_mtu > max_mtu) + if (new_mtu < 68) return -EINVAL; + if (new_mtu > max_mtu) { + if (strict) + return -EINVAL; + + new_mtu = max_mtu; + } + dev->mtu = new_mtu; return 0; } +static int vxlan_change_mtu(struct net_device *dev, int new_mtu) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_rdst *dst = &vxlan->default_dst; + struct net_device *lowerdev = __dev_get_by_index(vxlan->net, + dst->remote_ifindex); + return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true); +} + static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb, struct ip_tunnel_info *info, __be16 sport, __be16 dport) From 51d7c394605bd5d72e76745def0002dd938ec48b Mon Sep 17 00:00:00 2001 From: David Wragg Date: Fri, 3 Jun 2016 18:58:14 -0400 Subject: [PATCH 028/433] geneve: Relax MTU constraints [ Upstream commit 55e5bfb53cff286c1c1ff49f51325dc15c7fea63 ] Allow the MTU of geneve devices to be set to large values, in order to exploit underlying networks with larger frame sizes. GENEVE does not have a fixed encapsulation overhead (an openvswitch rule can add variable length options), so there is no relevant maximum MTU to enforce. A maximum of IP_MAX_MTU is used instead. Encapsulated packets that are too big for the underlying network will get dropped on the floor. Signed-off-by: David Wragg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/geneve.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index e3f939fef6968a..240620c5888a18 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -994,6 +994,17 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) return geneve_xmit_skb(skb, dev, info); } +static int geneve_change_mtu(struct net_device *dev, int new_mtu) +{ + /* GENEVE overhead is not fixed, so we can't enforce a more + * precise max MTU. + */ + if (new_mtu < 68 || new_mtu > IP_MAX_MTU) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); @@ -1038,7 +1049,7 @@ static const struct net_device_ops geneve_netdev_ops = { .ndo_stop = geneve_stop, .ndo_start_xmit = geneve_xmit, .ndo_get_stats64 = ip_tunnel_get_stats64, - .ndo_change_mtu = eth_change_mtu, + .ndo_change_mtu = geneve_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_fill_metadata_dst = geneve_fill_metadata_dst, From ce9c0dba5bf3ad4a25a9dc202e36e74d904df61d Mon Sep 17 00:00:00 2001 From: David Wragg Date: Fri, 3 Jun 2016 18:58:15 -0400 Subject: [PATCH 029/433] vxlan, gre, geneve: Set a large MTU on ovs-created tunnel devices [ Upstream commit 7e059158d57b79159eaf1f504825d19866ef2c42 ] Prior to 4.3, openvswitch tunnel vports (vxlan, gre and geneve) could transmit vxlan packets of any size, constrained only by the ability to send out the resulting packets. 4.3 introduced netdevs corresponding to tunnel vports. These netdevs have an MTU, which limits the size of a packet that can be successfully encapsulated. The default MTU values are low (1500 or less), which is awkwardly small in the context of physical networks supporting jumbo frames, and leads to a conspicuous change in behaviour for userspace. Instead, set the MTU on openvswitch-created netdevs to be the relevant maximum (i.e. the maximum IP packet size minus any relevant overhead), effectively restoring the behaviour prior to 4.3. Signed-off-by: David Wragg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/geneve.c | 18 ++++++++++++++---- drivers/net/vxlan.c | 11 ++++++++--- include/net/ip_tunnels.h | 1 + net/ipv4/ip_gre.c | 8 ++++++++ net/ipv4/ip_tunnel.c | 20 +++++++++++++++++--- net/openvswitch/vport-vxlan.c | 2 ++ 6 files changed, 50 insertions(+), 10 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 240620c5888a18..69e31e2a68fcc5 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1356,11 +1356,21 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, err = geneve_configure(net, dev, &geneve_remote_unspec, 0, 0, 0, htons(dst_port), true); - if (err) { - free_netdev(dev); - return ERR_PTR(err); - } + if (err) + goto err; + + /* openvswitch users expect packet sizes to be unrestricted, + * so set the largest MTU we can. + */ + err = geneve_change_mtu(dev, IP_MAX_MTU); + if (err) + goto err; + return dev; + + err: + free_netdev(dev); + return ERR_PTR(err); } EXPORT_SYMBOL_GPL(geneve_dev_create_fb); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2f44bc5b20a41d..00378090162876 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2776,6 +2776,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, int err; bool use_ipv6 = false; __be16 default_port = vxlan->cfg.dst_port; + struct net_device *lowerdev = NULL; vxlan->net = src_net; @@ -2796,9 +2797,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, } if (conf->remote_ifindex) { - struct net_device *lowerdev - = __dev_get_by_index(src_net, conf->remote_ifindex); - + lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex); dst->remote_ifindex = conf->remote_ifindex; if (!lowerdev) { @@ -2822,6 +2821,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, needed_headroom = lowerdev->hard_header_len; } + if (conf->mtu) { + err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false); + if (err) + return err; + } + if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA) needed_headroom += VXLAN6_HEADROOM; else diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 62a750a6a8f8cb..af40bc586a1b6a 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -230,6 +230,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, u8 *protocol, struct flowi4 *fl4); +int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 7dc962b89fa166..3e4184088082f2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1247,6 +1247,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, err = ipgre_newlink(net, dev, tb, NULL); if (err < 0) goto out; + + /* openvswitch users expect packet sizes to be unrestricted, + * so set the largest MTU we can. + */ + err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); + if (err) + goto out; + return dev; out: free_netdev(dev); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ce30c8b72457fd..3310ac75e3f394 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -948,17 +948,31 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) } EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); -int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); + int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; - if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) + if (new_mtu < 68) return -EINVAL; + + if (new_mtu > max_mtu) { + if (strict) + return -EINVAL; + + new_mtu = max_mtu; + } + dev->mtu = new_mtu; return 0; } +EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); + +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + return __ip_tunnel_change_mtu(dev, new_mtu, true); +} EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); static void ip_tunnel_dev_free(struct net_device *dev) diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index d933cb89efac18..5eb7694348b5b8 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -91,6 +91,8 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) struct vxlan_config conf = { .no_share = true, .flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX, + /* Don't restrict the packets that can be sent by MTU */ + .mtu = IP_MAX_MTU, }; if (!options) { From ded4fc623b3c331b847d23f947f457a020f25683 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 1 Jun 2016 14:09:23 +0200 Subject: [PATCH 030/433] KVM: x86: fix OOPS after invalid KVM_SET_DEBUGREGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d14bdb553f9196169f003058ae1cdabe514470e6 upstream. MOV to DR6 or DR7 causes a #GP if an attempt is made to write a 1 to any of bits 63:32. However, this is not detected at KVM_SET_DEBUGREGS time, and the next KVM_RUN oopses: general protection fault: 0000 [#1] SMP CPU: 2 PID: 14987 Comm: a.out Not tainted 4.4.9-300.fc23.x86_64 #1 Hardware name: LENOVO 2325F51/2325F51, BIOS G2ET32WW (1.12 ) 05/30/2012 [...] Call Trace: [] kvm_arch_vcpu_ioctl_run+0x141d/0x14e0 [kvm] [] kvm_vcpu_ioctl+0x33d/0x620 [kvm] [] do_vfs_ioctl+0x298/0x480 [] SyS_ioctl+0x79/0x90 [] entry_SYSCALL_64_fastpath+0x12/0x71 Code: 55 83 ff 07 48 89 e5 77 27 89 ff ff 24 fd 90 87 80 81 0f 23 fe 5d c3 0f 23 c6 5d c3 0f 23 ce 5d c3 0f 23 d6 5d c3 0f 23 de 5d c3 <0f> 23 f6 5d c3 0f 0b 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 RIP [] native_set_debugreg+0x2b/0x40 RSP Testcase (beautified/reduced from syzkaller output): #include #include #include #include #include #include #include long r[8]; int main() { struct kvm_debugregs dr = { 0 }; r[2] = open("/dev/kvm", O_RDONLY); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7); memcpy(&dr, "\x5d\x6a\x6b\xe8\x57\x3b\x4b\x7e\xcf\x0d\xa1\x72" "\xa3\x4a\x29\x0c\xfc\x6d\x44\x00\xa7\x52\xc7\xd8" "\x00\xdb\x89\x9d\x78\xb5\x54\x6b\x6b\x13\x1c\xe9" "\x5e\xd3\x0e\x40\x6f\xb4\x66\xf7\x5b\xe3\x36\xcb", 48); r[7] = ioctl(r[4], KVM_SET_DEBUGREGS, &dr); r[6] = ioctl(r[4], KVM_RUN, 0); } Reported-by: Dmitry Vyukov Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 605cea75eb0d18..be222666b1c255 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3014,6 +3014,11 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, if (dbgregs->flags) return -EINVAL; + if (dbgregs->dr6 & ~0xffffffffull) + return -EINVAL; + if (dbgregs->dr7 & ~0xffffffffull) + return -EINVAL; + memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); kvm_update_dr0123(vcpu); vcpu->arch.dr6 = dbgregs->dr6; From 2cb77b0ad44351869427d6a744bf5791e6a2c100 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 1 Jun 2016 14:09:21 +0200 Subject: [PATCH 031/433] KVM: irqfd: fix NULL pointer dereference in kvm_irq_map_gsi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c622a3c21ede892e370b56e1ceb9eb28f8bbda6b upstream. Found by syzkaller: BUG: unable to handle kernel NULL pointer dereference at 0000000000000120 IP: [] kvm_irq_map_gsi+0x12/0x90 [kvm] PGD 6f80b067 PUD b6535067 PMD 0 Oops: 0000 [#1] SMP CPU: 3 PID: 4988 Comm: a.out Not tainted 4.4.9-300.fc23.x86_64 #1 [...] Call Trace: [] irqfd_update+0x32/0xc0 [kvm] [] kvm_irqfd+0x3dc/0x5b0 [kvm] [] kvm_vm_ioctl+0x164/0x6f0 [kvm] [] do_vfs_ioctl+0x298/0x480 [] SyS_ioctl+0x79/0x90 [] tracesys_phase2+0x84/0x89 Code: b5 71 a7 e0 5b 41 5c 41 5d 5d f3 c3 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 8b 8f 10 2e 00 00 31 c0 48 89 e5 <39> 91 20 01 00 00 76 6a 48 63 d2 48 8b 94 d1 28 01 00 00 48 85 RIP [] kvm_irq_map_gsi+0x12/0x90 [kvm] RSP CR2: 0000000000000120 Testcase: #include #include #include #include #include #include #include long r[26]; int main() { memset(r, -1, sizeof(r)); r[2] = open("/dev/kvm", 0); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); struct kvm_irqfd ifd; ifd.fd = syscall(SYS_eventfd2, 5, 0); ifd.gsi = 3; ifd.flags = 2; ifd.resamplefd = ifd.fd; r[25] = ioctl(r[3], KVM_IRQFD, &ifd); return 0; } Reported-by: Dmitry Vyukov Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- virt/kvm/irqchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index f0b08a2a48ba3f..7d31d8c5b9ea84 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -40,7 +40,7 @@ int kvm_irq_map_gsi(struct kvm *kvm, irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, lockdep_is_held(&kvm->irq_lock)); - if (gsi < irq_rt->nr_rt_entries) { + if (irq_rt && gsi < irq_rt->nr_rt_entries) { hlist_for_each_entry(e, &irq_rt->map[gsi], link) { entries[n] = *e; ++n; From 1f4b75078c205b34f3fcfd04098d61b77e044e68 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 9 Jun 2016 11:32:14 +0530 Subject: [PATCH 032/433] ALSA: hda - Add PCI ID for Kabylake commit 35639a0e98391036a4c7f23253c321d6621a8897 upstream. Kabylake shows up as PCI ID 0xa171. And Kabylake-LP as 0x9d71. Since these are similar to Skylake add these to SKL_PLUS macro Signed-off-by: Vinod Koul Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 411630e9c034ae..1475440b70aa11 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -359,8 +359,11 @@ enum { #define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170) #define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70) +#define IS_KBL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa171) +#define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71) #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) -#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) +#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \ + IS_KBL(pci) || IS_KBL_LP(pci) static char *driver_short_names[] = { [AZX_DRIVER_ICH] = "HDA Intel", @@ -2204,6 +2207,12 @@ static const struct pci_device_id azx_ids[] = { /* Sunrise Point-LP */ { PCI_DEVICE(0x8086, 0x9d70), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, + /* Kabylake */ + { PCI_DEVICE(0x8086, 0xa171), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, + /* Kabylake-LP */ + { PCI_DEVICE(0x8086, 0x9d71), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, /* Broxton-P(Apollolake) */ { PCI_DEVICE(0x8086, 0x5a98), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, From 1bf80a48ff88552644ede5c953321d667808fa1d Mon Sep 17 00:00:00 2001 From: AceLan Kao Date: Fri, 3 Jun 2016 14:45:25 +0800 Subject: [PATCH 033/433] ALSA: hda - Fix headset mic detection problem for Dell machine commit f90d83b301701026b2e4c437a3613f377f63290e upstream. Add the pin configuration value of this machine into the pin_quirk table to make DELL1_MIC_NO_PRESENCE apply to this machine. Signed-off-by: AceLan Kao Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d53c25e7a1c194..cd395c0623e5f6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5775,6 +5775,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60180}, {0x14, 0x90170130}, {0x21, 0x02211040}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell Inspiron 5565", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60180}, + {0x14, 0x90170120}, + {0x21, 0x02211030}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, From c3fd646bb8c4faab68fed0e751d0e4f623b5dbab Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 30 May 2016 15:58:28 +0800 Subject: [PATCH 034/433] ALSA: hda/realtek - ALC256 speaker noise issue commit e69e7e03ed225abf3e1c43545aa3bcb68dc81d5f upstream. That is some different register for ALC255 and ALC256. ALC256 can't fit with some ALC255 register. This issue is cause from LDO output voltage control. This patch is updated the right LDO register value. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 52 +++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index cd395c0623e5f6..13bbfdfa2fb0ad 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3618,13 +3618,20 @@ static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, static void alc_headset_mode_unplugged(struct hda_codec *codec) { static struct coef_fw coef0255[] = { - WRITE_COEF(0x1b, 0x0c0b), /* LDO and MISC control */ WRITE_COEF(0x45, 0xd089), /* UAJ function set to menual mode */ UPDATE_COEFEX(0x57, 0x05, 1<<14, 0), /* Direct Drive HP Amp control(Set to verb control)*/ WRITE_COEF(0x06, 0x6104), /* Set MIC2 Vref gate with HP */ WRITE_COEFEX(0x57, 0x03, 0x8aa6), /* Direct Drive HP Amp control */ {} }; + static struct coef_fw coef0255_1[] = { + WRITE_COEF(0x1b, 0x0c0b), /* LDO and MISC control */ + {} + }; + static struct coef_fw coef0256[] = { + WRITE_COEF(0x1b, 0x0c4b), /* LDO and MISC control */ + {} + }; static struct coef_fw coef0233[] = { WRITE_COEF(0x1b, 0x0c0b), WRITE_COEF(0x45, 0xc429), @@ -3677,7 +3684,11 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0255: + alc_process_coef_fw(codec, coef0255_1); + alc_process_coef_fw(codec, coef0255); + break; case 0x10ec0256: + alc_process_coef_fw(codec, coef0256); alc_process_coef_fw(codec, coef0255); break; case 0x10ec0233: @@ -3896,6 +3907,12 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) WRITE_COEFEX(0x57, 0x03, 0x8ea6), {} }; + static struct coef_fw coef0256[] = { + WRITE_COEF(0x45, 0xd489), /* Set to CTIA type */ + WRITE_COEF(0x1b, 0x0c6b), + WRITE_COEFEX(0x57, 0x03, 0x8ea6), + {} + }; static struct coef_fw coef0233[] = { WRITE_COEF(0x45, 0xd429), WRITE_COEF(0x1b, 0x0c2b), @@ -3936,9 +3953,11 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0255: - case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; + case 0x10ec0256: + alc_process_coef_fw(codec, coef0256); + break; case 0x10ec0233: case 0x10ec0283: alc_process_coef_fw(codec, coef0233); @@ -3978,6 +3997,12 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) WRITE_COEFEX(0x57, 0x03, 0x8ea6), {} }; + static struct coef_fw coef0256[] = { + WRITE_COEF(0x45, 0xe489), /* Set to OMTP Type */ + WRITE_COEF(0x1b, 0x0c6b), + WRITE_COEFEX(0x57, 0x03, 0x8ea6), + {} + }; static struct coef_fw coef0233[] = { WRITE_COEF(0x45, 0xe429), WRITE_COEF(0x1b, 0x0c2b), @@ -4018,9 +4043,11 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0255: - case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; + case 0x10ec0256: + alc_process_coef_fw(codec, coef0256); + break; case 0x10ec0233: case 0x10ec0283: alc_process_coef_fw(codec, coef0233); @@ -4266,7 +4293,7 @@ static void alc_fixup_headset_mode_no_hp_mic(struct hda_codec *codec, static void alc255_set_default_jack_type(struct hda_codec *codec) { /* Set to iphone type */ - static struct coef_fw fw[] = { + static struct coef_fw alc255fw[] = { WRITE_COEF(0x1b, 0x880b), WRITE_COEF(0x45, 0xd089), WRITE_COEF(0x1b, 0x080b), @@ -4274,7 +4301,22 @@ static void alc255_set_default_jack_type(struct hda_codec *codec) WRITE_COEF(0x1b, 0x0c0b), {} }; - alc_process_coef_fw(codec, fw); + static struct coef_fw alc256fw[] = { + WRITE_COEF(0x1b, 0x884b), + WRITE_COEF(0x45, 0xd089), + WRITE_COEF(0x1b, 0x084b), + WRITE_COEF(0x46, 0x0004), + WRITE_COEF(0x1b, 0x0c4b), + {} + }; + switch (codec->core.vendor_id) { + case 0x10ec0255: + alc_process_coef_fw(codec, alc255fw); + break; + case 0x10ec0256: + alc_process_coef_fw(codec, alc256fw); + break; + } msleep(30); } From 81999107ce6de995fc532d4fed23a5189ce18eac Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 30 May 2016 16:44:20 +0800 Subject: [PATCH 035/433] ALSA: hda/realtek - Add support for new codecs ALC700/ALC701/ALC703 commit 6fbae35a3170c3e2b1b9d7b9cc943cbe48771362 upstream. Support new codecs for ALC700/ALC701/ALC703. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 13bbfdfa2fb0ad..49d581aed7f99e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -346,6 +346,9 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0234: case 0x10ec0274: case 0x10ec0294: + case 0x10ec0700: + case 0x10ec0701: + case 0x10ec0703: alc_update_coef_idx(codec, 0x10, 1<<15, 0); break; case 0x10ec0662: @@ -2655,6 +2658,7 @@ enum { ALC269_TYPE_ALC256, ALC269_TYPE_ALC225, ALC269_TYPE_ALC294, + ALC269_TYPE_ALC700, }; /* @@ -2686,6 +2690,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec) case ALC269_TYPE_ALC256: case ALC269_TYPE_ALC225: case ALC269_TYPE_ALC294: + case ALC269_TYPE_ALC700: ssids = alc269_ssids; break; default: @@ -6099,6 +6104,14 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0294: spec->codec_variant = ALC269_TYPE_ALC294; break; + case 0x10ec0700: + case 0x10ec0701: + case 0x10ec0703: + spec->codec_variant = ALC269_TYPE_ALC700; + spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */ + alc_update_coef_idx(codec, 0x4a, 0, 1 << 15); /* Combo jack auto trigger control */ + break; + } if (snd_hda_codec_read(codec, 0x51, 0, AC_VERB_PARAMETERS, 0) == 0x10ec5505) { @@ -7054,6 +7067,9 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0670, "ALC670", patch_alc662), HDA_CODEC_ENTRY(0x10ec0671, "ALC671", patch_alc662), HDA_CODEC_ENTRY(0x10ec0680, "ALC680", patch_alc680), + HDA_CODEC_ENTRY(0x10ec0700, "ALC700", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0701, "ALC701", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0703, "ALC703", patch_alc269), HDA_CODEC_ENTRY(0x10ec0867, "ALC891", patch_alc882), HDA_CODEC_ENTRY(0x10ec0880, "ALC880", patch_alc880), HDA_CODEC_ENTRY(0x10ec0882, "ALC882", patch_alc882), From da7f1c92042c25048a53b0eaf716fc66aaac62a9 Mon Sep 17 00:00:00 2001 From: Torsten Hilbrich Date: Tue, 7 Jun 2016 13:14:21 +0200 Subject: [PATCH 036/433] ALSA: hda/realtek: Add T560 docking unit fixup commit dab38e43b298501a4e8807b56117c029e2e98383 upstream. Tested with Lenovo Ultradock. Fixes the non-working headphone jack on the docking unit. Signed-off-by: Torsten Hilbrich Tested-by: Torsten Hilbrich Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 49d581aed7f99e..0fe18ede3e8555 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5634,6 +5634,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), From 9edd6fd1eb92ebd61c84975855335922db632ea5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 30 May 2016 23:14:56 +0100 Subject: [PATCH 037/433] ARM: fix PTRACE_SETVFPREGS on SMP systems commit e2dfb4b880146bfd4b6aa8e138c0205407cebbaf upstream. PTRACE_SETVFPREGS fails to properly mark the VFP register set to be reloaded, because it undoes one of the effects of vfp_flush_hwstate(). Specifically vfp_flush_hwstate() sets thread->vfpstate.hard.cpu to an invalid CPU number, but vfp_set() overwrites this with the original CPU number, thereby rendering the hardware state as apparently "valid", even though the software state is more recent. Fix this by reverting the previous change. Fixes: 8130b9d7b9d8 ("ARM: 7308/1: vfp: flush thread hwstate before copying ptrace registers") Acked-by: Will Deacon Tested-by: Simon Marchi Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index ef9119f7462ea1..4d9375814b538e 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -733,8 +733,8 @@ static int vfp_set(struct task_struct *target, if (ret) return ret; - vfp_flush_hwstate(thread); thread->vfpstate.hard = new_vfp; + vfp_flush_hwstate(thread); return 0; } From e1c35534e3684e25053f5caf6e032956894e8b1f Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 7 Jun 2016 17:22:17 +0100 Subject: [PATCH 038/433] gpio: bcm-kona: fix bcm_kona_gpio_reset() warnings commit b66b2a0adf0e48973b582e055758b9907a7eee7c upstream. The bcm_kona_gpio_reset() calls bcm_kona_gpio_write_lock_regs() with what looks like the wrong parameter. The write_lock_regs function takes a pointer to the registers, not the bcm_kona_gpio structure. Fix the warning, and probably bug by changing the function to pass reg_base instead of kona_gpio, fixing the following warning: drivers/gpio/gpio-bcm-kona.c:550:47: warning: incorrect type in argument 1 (different address spaces) expected void [noderef] *reg_base got struct bcm_kona_gpio *kona_gpio warning: incorrect type in argument 1 (different address spaces) expected void [noderef] *reg_base got struct bcm_kona_gpio *kona_gpio Signed-off-by: Ben Dooks Acked-by: Ray Jui Reviewed-by: Markus Mayer Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-bcm-kona.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 33a1f9779b86bb..4ea71d505bcefc 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -551,11 +551,11 @@ static void bcm_kona_gpio_reset(struct bcm_kona_gpio *kona_gpio) /* disable interrupts and clear status */ for (i = 0; i < kona_gpio->num_bank; i++) { /* Unlock the entire bank first */ - bcm_kona_gpio_write_lock_regs(kona_gpio, i, UNLOCK_CODE); + bcm_kona_gpio_write_lock_regs(reg_base, i, UNLOCK_CODE); writel(0xffffffff, reg_base + GPIO_INT_MASK(i)); writel(0xffffffff, reg_base + GPIO_INT_STATUS(i)); /* Now re-lock the bank */ - bcm_kona_gpio_write_lock_regs(kona_gpio, i, LOCK_CODE); + bcm_kona_gpio_write_lock_regs(reg_base, i, LOCK_CODE); } } From ebf529182a6da5fcbecf0305b19de4e0cba048fb Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 11 May 2016 21:13:13 +0200 Subject: [PATCH 039/433] s390/bpf: fix recache skb->data/hlen for skb_vlan_push/pop commit 6edf0aa4f8bbdfbb4d6d786892fa02728d05dc36 upstream. In case of usage of skb_vlan_push/pop, in the prologue we store the SKB pointer on the stack and restore it after BPF_JMP_CALL to skb_vlan_push/pop. Unfortunately currently there are two bugs in the code: 1) The wrong stack slot (offset 170 instead of 176) is used 2) The wrong register (W1 instead of B1) is saved So fix this and use correct stack slot and register. Fixes: 9db7f2b81880 ("s390/bpf: recache skb->data/hlen for skb_vlan_push/pop") Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/net/bpf_jit.h | 4 ++-- arch/s390/net/bpf_jit_comp.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h index f010c93a88b16c..fda605dbc1b44b 100644 --- a/arch/s390/net/bpf_jit.h +++ b/arch/s390/net/bpf_jit.h @@ -37,7 +37,7 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; * | | | * +---------------+ | * | 8 byte skbp | | - * R15+170 -> +---------------+ | + * R15+176 -> +---------------+ | * | 8 byte hlen | | * R15+168 -> +---------------+ | * | 4 byte align | | @@ -58,7 +58,7 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; #define STK_OFF (STK_SPACE - STK_160_UNUSED) #define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */ #define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */ -#define STK_OFF_SKBP 170 /* Offset of SKB pointer on stack */ +#define STK_OFF_SKBP 176 /* Offset of SKB pointer on stack */ #define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */ #define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */ diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9a0c4c22e53670..fe607c8d25d236 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -446,7 +446,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic) emit_load_skb_data_hlen(jit); if (jit->seen & SEEN_SKB_CHANGE) /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ - EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, + EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, STK_OFF_SKBP); /* Clear A (%b0) and X (%b7) registers for converted BPF programs */ if (is_classic) { From 9be2fa205c9e1337a68847ae38e791bd6b17cc42 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 12 May 2016 18:10:48 +0200 Subject: [PATCH 040/433] s390/bpf: reduce maximum program size to 64 KB commit 0fa963553a5c28d8f8aabd8878326d3f782045fc upstream. The s390 BFP compiler currently uses relative branch instructions that only support jumps up to 64 KB. Examples are "j", "jnz", "cgrj", etc. Currently the maximum size of s390 BPF programs is set to 0x7ffff. If branches over 64 KB are generated the, kernel can crash due to incorrect code. So fix this an reduce the maximum size to 64 KB. Programs larger than that will be interpreted. Fixes: ce2b6ad9c185 ("s390/bpf: increase BPF_SIZE_MAX") Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index fe607c8d25d236..0e2919dd8df368 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -45,7 +45,7 @@ struct bpf_jit { int labels[1]; /* Labels for local jumps */ }; -#define BPF_SIZE_MAX 0x7ffff /* Max size for program (20 bit signed displ) */ +#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */ #define SEEN_SKB 1 /* skb access */ #define SEEN_MEM 2 /* use mem[] for temporary storage */ From f32ef5c8e9e847706a3ef96791e14b207914d9e3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 2 Jun 2016 09:00:28 +0100 Subject: [PATCH 041/433] irqchip/gic-v3: Fix ICC_SGI1R_EL1.INTID decoding mask commit dd5f1b049dc139876801db3cdd0f20d21fd428cc upstream. The INTID mask is wrong, and is made a signed value, which has nteresting effects in the KVM emulation. Let's sanitize it. Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index d5d798b35c1f6c..e98425058f20dc 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -301,7 +301,7 @@ #define ICC_SGI1R_AFFINITY_1_SHIFT 16 #define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT) #define ICC_SGI1R_SGI_ID_SHIFT 24 -#define ICC_SGI1R_SGI_ID_MASK (0xff << ICC_SGI1R_SGI_ID_SHIFT) +#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT) #define ICC_SGI1R_AFFINITY_2_SHIFT 32 #define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) #define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 From b440f3ae617bd81a5699bbba5120c8f54c456f81 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 May 2016 16:55:56 +0200 Subject: [PATCH 042/433] crypto: public_key: select CRYPTO_AKCIPHER commit bad6a185b4d6f81d0ed2b6e4c16307969f160b95 upstream. In some rare randconfig builds, we can end up with ASYMMETRIC_PUBLIC_KEY_SUBTYPE enabled but CRYPTO_AKCIPHER disabled, which fails to link because of the reference to crypto_alloc_akcipher: crypto/built-in.o: In function `public_key_verify_signature': :(.text+0x110e4): undefined reference to `crypto_alloc_akcipher' This adds a Kconfig 'select' statement to ensure the dependency is always there. Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/asymmetric_keys/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig index 4870f28403f545..05bfe568cd30c2 100644 --- a/crypto/asymmetric_keys/Kconfig +++ b/crypto/asymmetric_keys/Kconfig @@ -14,6 +14,7 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE select MPILIB select PUBLIC_KEY_ALGO_RSA select CRYPTO_HASH_INFO + select CRYPTO_AKCIPHER help This option provides support for asymmetric public key type handling. If signature generation and/or verification are to be used, From 774920eece6e15f6560ba0ad5a9b25eb43d075fe Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 20 May 2016 17:33:03 -0500 Subject: [PATCH 043/433] crypto: ccp - Fix AES XTS error for request sizes above 4096 commit ab6a11a7c8ef47f996974dd3c648c2c0b1a36ab1 upstream. The ccp-crypto module for AES XTS support has a bug that can allow requests greater than 4096 bytes in size to be passed to the CCP hardware. The CCP hardware does not support request sizes larger than 4096, resulting in incorrect output. The request should actually be handled by the fallback mechanism instantiated by the ccp-crypto module. Add a check to insure the request size is less than or equal to the maximum supported size and use the fallback mechanism if it is not. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/ccp-crypto-aes-xts.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c index 52c7395cb8d8b7..0d0d4529ee3601 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -122,6 +122,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req, struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); unsigned int unit; + u32 unit_size; int ret; if (!ctx->u.aes.key_len) @@ -133,11 +134,17 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req, if (!req->info) return -EINVAL; - for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++) - if (!(req->nbytes & (unit_size_map[unit].size - 1))) - break; + unit_size = CCP_XTS_AES_UNIT_SIZE__LAST; + if (req->nbytes <= unit_size_map[0].size) { + for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++) { + if (!(req->nbytes & (unit_size_map[unit].size - 1))) { + unit_size = unit_size_map[unit].value; + break; + } + } + } - if ((unit_size_map[unit].value == CCP_XTS_AES_UNIT_SIZE__LAST) || + if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) || (ctx->u.aes.key_len != AES_KEYSIZE_128)) { /* Use the fallback to process the request for any * unsupported unit sizes or key sizes @@ -158,7 +165,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req, rctx->cmd.engine = CCP_ENGINE_XTS_AES_128; rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT : CCP_AES_ACTION_DECRYPT; - rctx->cmd.u.xts.unit_size = unit_size_map[unit].value; + rctx->cmd.u.xts.unit_size = unit_size; rctx->cmd.u.xts.key = &ctx->u.aes.key_sg; rctx->cmd.u.xts.key_len = ctx->u.aes.key_len; rctx->cmd.u.xts.iv = &rctx->iv_sg; From d0bc1f47b8a7eee50b10035c645f6e6d7e719a62 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 31 May 2016 15:55:03 +0100 Subject: [PATCH 044/433] arm64: Provide "model name" in /proc/cpuinfo for PER_LINUX32 tasks commit e47b020a323d1b2a7b1e9aac86e99eae19463630 upstream. This patch brings the PER_LINUX32 /proc/cpuinfo format more in line with the 32-bit ARM one by providing an additional line: model name : ARMv8 Processor rev X (v8l) Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/elf.h | 4 ++-- arch/arm64/kernel/cpuinfo.c | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index faad6df49e5b00..bc6492b9a92414 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -156,14 +156,14 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) #endif -#ifdef CONFIG_COMPAT - #ifdef __AARCH64EB__ #define COMPAT_ELF_PLATFORM ("v8b") #else #define COMPAT_ELF_PLATFORM ("v8l") #endif +#ifdef CONFIG_COMPAT + #define COMPAT_ELF_ET_DYN_BASE (2 * TASK_SIZE_32 / 3) /* AArch32 registers. */ diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index a5f2340396167d..0166cfbc866c01 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -22,6 +22,8 @@ #include #include +#include +#include #include #include #include @@ -102,6 +104,7 @@ static const char *const compat_hwcap2_str[] = { static int c_show(struct seq_file *m, void *v) { int i, j; + bool compat = personality(current->personality) == PER_LINUX32; for_each_online_cpu(i) { struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); @@ -113,6 +116,9 @@ static int c_show(struct seq_file *m, void *v) * "processor". Give glibc what it expects. */ seq_printf(m, "processor\t: %d\n", i); + if (compat) + seq_printf(m, "model name\t: ARMv8 Processor rev %d (%s)\n", + MIDR_REVISION(midr), COMPAT_ELF_PLATFORM); seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", loops_per_jiffy / (500000UL/HZ), @@ -125,7 +131,7 @@ static int c_show(struct seq_file *m, void *v) * software which does already (at least for 32-bit). */ seq_puts(m, "Features\t:"); - if (personality(current->personality) == PER_LINUX32) { + if (compat) { #ifdef CONFIG_COMPAT for (j = 0; compat_hwcap_str[j]; j++) if (compat_elf_hwcap & (1 << j)) From 5e8b53a4db947494f1d808469a411f7f2f8bb3ca Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Jun 2016 17:55:15 +0100 Subject: [PATCH 045/433] arm64: mm: always take dirty state from new pte in ptep_set_access_flags commit 0106d456c4cb1770253fefc0ab23c9ca760b43f7 upstream. Commit 66dbd6e61a52 ("arm64: Implement ptep_set_access_flags() for hardware AF/DBM") ensured that pte flags are updated atomically in the face of potential concurrent, hardware-assisted updates. However, Alex reports that: | This patch breaks swapping for me. | In the broken case, you'll see either systemd cpu time spike (because | it's stuck in a page fault loop) or the system hang (because the | application owning the screen is stuck in a page fault loop). It turns out that this is because the 'dirty' argument to ptep_set_access_flags is always 0 for read faults, and so we can't use it to set PTE_RDONLY. The failing sequence is: 1. We put down a PTE_WRITE | PTE_DIRTY | PTE_AF pte 2. Memory pressure -> pte_mkold(pte) -> clear PTE_AF 3. A read faults due to the missing access flag 4. ptep_set_access_flags is called with dirty = 0, due to the read fault 5. pte is then made PTE_WRITE | PTE_DIRTY | PTE_AF | PTE_RDONLY (!) 6. A write faults, but pte_write is true so we get stuck The solution is to check the new page table entry (as would be done by the generic, non-atomic definition of ptep_set_access_flags that just calls set_pte_at) to establish the dirty state. Fixes: 66dbd6e61a52 ("arm64: Implement ptep_set_access_flags() for hardware AF/DBM") Reviewed-by: Catalin Marinas Reported-by: Alexander Graf Tested-by: Alexander Graf Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 40f5522245a2b2..4c1a118c1d096e 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -109,7 +109,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, * PTE_RDONLY is cleared by default in the asm below, so set it in * back if necessary (read-only or clean PTE). */ - if (!pte_write(entry) || !dirty) + if (!pte_write(entry) || !pte_sw_dirty(entry)) pte_val(entry) |= PTE_RDONLY; /* From baa6dfd627b460f73a8b79fd4cf87ea31e690e36 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Thu, 7 Apr 2016 16:28:26 +1000 Subject: [PATCH 046/433] powerpc/pseries/eeh: Handle RTAS delay requests in configure_bridge commit 871e178e0f2c4fa788f694721a10b4758d494ce1 upstream. In the "ibm,configure-pe" and "ibm,configure-bridge" RTAS calls, the spec states that values of 9900-9905 can be returned, indicating that software should delay for 10^x (where x is the last digit, i.e. 990x) milliseconds and attempt the call again. Currently, the kernel doesn't know about this, and respecting it fixes some PCI failures when the hypervisor is busy. The delay is capped at 0.2 seconds. Signed-off-by: Russell Currey Acked-by: Gavin Shan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/eeh_pseries.c | 51 ++++++++++++++------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index ac3ffd97e05966..405baaf9686416 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -615,29 +615,50 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) { int config_addr; int ret; + /* Waiting 0.2s maximum before skipping configuration */ + int max_wait = 200; /* Figure out the PE address */ config_addr = pe->config_addr; if (pe->addr) config_addr = pe->addr; - /* Use new configure-pe function, if supported */ - if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) { - ret = rtas_call(ibm_configure_pe, 3, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid)); - } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) { - ret = rtas_call(ibm_configure_bridge, 3, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid)); - } else { - return -EFAULT; - } + while (max_wait > 0) { + /* Use new configure-pe function, if supported */ + if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call(ibm_configure_pe, 3, 1, NULL, + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); + } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call(ibm_configure_bridge, 3, 1, NULL, + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); + } else { + return -EFAULT; + } - if (ret) - pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", - __func__, pe->phb->global_number, pe->addr, ret); + if (!ret) + return ret; + + /* + * If RTAS returns a delay value that's above 100ms, cut it + * down to 100ms in case firmware made a mistake. For more + * on how these delay values work see rtas_busy_delay_time + */ + if (ret > RTAS_EXTENDED_DELAY_MIN+2 && + ret <= RTAS_EXTENDED_DELAY_MAX) + ret = RTAS_EXTENDED_DELAY_MIN+2; + + max_wait -= rtas_busy_delay_time(ret); + + if (max_wait < 0) + break; + + rtas_busy_delay(ret); + } + pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", + __func__, pe->phb->global_number, pe->addr, ret); return ret; } From 4f27ca0e25df7b317983a1c8a1febecbeae81813 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 12 May 2016 13:26:44 +0200 Subject: [PATCH 047/433] powerpc: Fix definition of SIAR and SDAR registers commit d23fac2b27d94aeb7b65536a50d32bfdc21fe01e upstream. The SIAR and SDAR registers are available twice, one time as SPRs 780 / 781 (unprivileged, but read-only), and one time as the SPRs 796 / 797 (privileged, but read and write). The Linux kernel code currently uses the unprivileged SPRs - while this is OK for reading, writing to that register of course does not work. Since the KVM code tries to write to this register, too (see the mtspr in book3s_hv_rmhandlers.S), the contents of this register sometimes get lost for the guests, e.g. during migration of a VM. To fix this issue, simply switch to the privileged SPR numbers instead. Signed-off-by: Thomas Huth Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/reg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 2220f7a60def31..93d69bd260e75c 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -744,13 +744,13 @@ #define SPRN_PMC6 792 #define SPRN_PMC7 793 #define SPRN_PMC8 794 -#define SPRN_SIAR 780 -#define SPRN_SDAR 781 #define SPRN_SIER 784 #define SIER_SIPR 0x2000000 /* Sampled MSR_PR */ #define SIER_SIHV 0x1000000 /* Sampled MSR_HV */ #define SIER_SIAR_VALID 0x0400000 /* SIAR contents valid */ #define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */ +#define SPRN_SIAR 796 +#define SPRN_SDAR 797 #define SPRN_TACR 888 #define SPRN_TCSCR 889 #define SPRN_CSIGR 890 From cac2863ff3e64221f8888da9fcf72080181e91a8 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 12 May 2016 13:29:11 +0200 Subject: [PATCH 048/433] powerpc: Use privileged SPR number for MMCR2 commit 8dd75ccb571f3c92c48014b3dabd3d51a115ab41 upstream. We are already using the privileged versions of MMCR0, MMCR1 and MMCRA in the kernel, so for MMCR2, we should better use the privileged versions, too, to be consistent. Fixes: 240686c13687 ("powerpc: Initialise PMU related regs on Power8") Suggested-by: Paul Mackerras Signed-off-by: Thomas Huth Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 93d69bd260e75c..070fa855205179 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -707,7 +707,7 @@ #define MMCR0_FCWAIT 0x00000002UL /* freeze counter in WAIT state */ #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ #define SPRN_MMCR1 798 -#define SPRN_MMCR2 769 +#define SPRN_MMCR2 785 #define SPRN_MMCRA 0x312 #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ #define MMCRA_SDAR_DCACHE_MISS 0x40000000UL From a976f62a601a763ea37d116d5a9009a2eec9d0f3 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 31 May 2016 07:51:17 +0200 Subject: [PATCH 049/433] powerpc/pseries: Add POWER8NVL support to ibm,client-architecture-support call commit 7cc851039d643a2ee7df4d18177150f2c3a484f5 upstream. If we do not provide the PVR for POWER8NVL, a guest on this system currently ends up in PowerISA 2.06 compatibility mode on KVM, since QEMU does not provide a generic PowerISA 2.07 mode yet. So some new instructions from POWER8 (like "mtvsrd") get disabled for the guest, resulting in crashes when using code compiled explicitly for POWER8 (e.g. with the "-mcpu=power8" option of GCC). Fixes: ddee09c099c3 ("powerpc: Add PVR for POWER8NVL processor") Signed-off-by: Thomas Huth Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/prom_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 92dea8df6b26d1..e52b82b71d798f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -655,6 +655,7 @@ unsigned char ibm_architecture_vec[] = { W(0xffff0000), W(0x003e0000), /* POWER6 */ W(0xffff0000), W(0x003f0000), /* POWER7 */ W(0xffff0000), W(0x004b0000), /* POWER8E */ + W(0xffff0000), W(0x004c0000), /* POWER8NVL */ W(0xffff0000), W(0x004d0000), /* POWER8 */ W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */ W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ From b5ff1d6012726f539723c24712bb71976ee2bc77 Mon Sep 17 00:00:00 2001 From: "hongkun.cao" Date: Sat, 21 May 2016 15:23:39 +0800 Subject: [PATCH 050/433] pinctrl: mediatek: fix dual-edge code defect commit 5edf673d07fdcb6498be24914f3f38f8d8843199 upstream. When a dual-edge irq is triggered, an incorrect irq will be reported on condition that the external signal is not stable and this incorrect irq has been registered. Correct the register offset. Signed-off-by: Hongkun Cao Reviewed-by: Matthias Brugger Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 3d8019eb3d8476..181b35879ebd7d 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1191,9 +1191,10 @@ static void mtk_eint_irq_handler(struct irq_desc *desc) const struct mtk_desc_pin *pin; chained_irq_enter(chip, desc); - for (eint_num = 0; eint_num < pctl->devdata->ap_num; eint_num += 32) { + for (eint_num = 0; + eint_num < pctl->devdata->ap_num; + eint_num += 32, reg += 4) { status = readl(reg); - reg += 4; while (status) { offset = __ffs(status); index = eint_num + offset; From 1125f3b09513831b6863a1ed78fb0d1750105bfc Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 4 Jun 2016 17:21:33 +0200 Subject: [PATCH 051/433] parisc: Fix pagefault crash in unaligned __get_user() call commit 8b78f260887df532da529f225c49195d18fef36b upstream. One of the debian buildd servers had this crash in the syslog without any other information: Unaligned handler failed, ret = -2 clock_adjtime (pid 22578): Unaligned data reference (code 28) CPU: 1 PID: 22578 Comm: clock_adjtime Tainted: G E 4.5.0-2-parisc64-smp #1 Debian 4.5.4-1 task: 000000007d9960f8 ti: 00000001bde7c000 task.ti: 00000001bde7c000 YZrvWESTHLNXBCVMcbcbcbcbOGFRQPDI PSW: 00001000000001001111100000001111 Tainted: G E r00-03 000000ff0804f80f 00000001bde7c2b0 00000000402d2be8 00000001bde7c2b0 r04-07 00000000409e1fd0 00000000fa6f7fff 00000001bde7c148 00000000fa6f7fff r08-11 0000000000000000 00000000ffffffff 00000000fac9bb7b 000000000002b4d4 r12-15 000000000015241c 000000000015242c 000000000000002d 00000000fac9bb7b r16-19 0000000000028800 0000000000000001 0000000000000070 00000001bde7c218 r20-23 0000000000000000 00000001bde7c210 0000000000000002 0000000000000000 r24-27 0000000000000000 0000000000000000 00000001bde7c148 00000000409e1fd0 r28-31 0000000000000001 00000001bde7c320 00000001bde7c350 00000001bde7c218 sr00-03 0000000001200000 0000000001200000 0000000000000000 0000000001200000 sr04-07 0000000000000000 0000000000000000 0000000000000000 0000000000000000 IASQ: 0000000000000000 0000000000000000 IAOQ: 00000000402d2e84 00000000402d2e88 IIR: 0ca0d089 ISR: 0000000001200000 IOR: 00000000fa6f7fff CPU: 1 CR30: 00000001bde7c000 CR31: ffffffffffffffff ORIG_R28: 00000002369fe628 IAOQ[0]: compat_get_timex+0x2dc/0x3c0 IAOQ[1]: compat_get_timex+0x2e0/0x3c0 RP(r2): compat_get_timex+0x40/0x3c0 Backtrace: [<00000000402d4608>] compat_SyS_clock_adjtime+0x40/0xc0 [<0000000040205024>] syscall_exit+0x0/0x14 This means the userspace program clock_adjtime called the clock_adjtime() syscall and then crashed inside the compat_get_timex() function. Syscalls should never crash programs, but instead return EFAULT. The IIR register contains the executed instruction, which disassebles into "ldw 0(sr3,r5),r9". This load-word instruction is part of __get_user() which tried to read the word at %r5/IOR (0xfa6f7fff). This means the unaligned handler jumped in. The unaligned handler is able to emulate all ldw instructions, but it fails if it fails to read the source e.g. because of page fault. The following program reproduces the problem: #define _GNU_SOURCE #include #include #include int main(void) { /* allocate 8k */ char *ptr = mmap(NULL, 2*4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); /* free second half (upper 4k) and make it invalid. */ munmap(ptr+4096, 4096); /* syscall where first int is unaligned and clobbers into invalid memory region */ /* syscall should return EFAULT */ return syscall(__NR_clock_adjtime, 0, ptr+4095); } To fix this issue we simply need to check if the faulting instruction address is in the exception fixup table when the unaligned handler failed. If it is, call the fixup routine instead of crashing. While looking at the unaligned handler I found another issue as well: The target register should not be modified if the handler was unsuccessful. Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/unaligned.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index d7c0acb35ec248..8d49614d600d88 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -666,7 +666,7 @@ void handle_unaligned(struct pt_regs *regs) break; } - if (modify && R1(regs->iir)) + if (ret == 0 && modify && R1(regs->iir)) regs->gr[R1(regs->iir)] = newbase; @@ -677,6 +677,14 @@ void handle_unaligned(struct pt_regs *regs) if (ret) { + /* + * The unaligned handler failed. + * If we were called by __get_user() or __put_user() jump + * to it's exception fixup handler instead of crashing. + */ + if (!user_mode(regs) && fixup_exception(regs)) + return; + printk(KERN_CRIT "Unaligned handler failed, ret = %d\n", ret); die_if_kernel("Unaligned data reference", regs, 28); From d3f97524ef1b2b12df6669a701e66c02f1da523d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Jun 2016 14:55:44 -0700 Subject: [PATCH 052/433] memcg: add RCU locking around css_for_each_descendant_pre() in memcg_offline_kmem() commit 3a06bb78ceeceacc86a1e31133a7944013f9775b upstream. memcg_offline_kmem() may be called from memcg_free_kmem() after a css init failure. memcg_free_kmem() is a ->css_free callback which is called without cgroup_mutex and memcg_offline_kmem() ends up using css_for_each_descendant_pre() without any locking. Fix it by adding rcu read locking around it. mkdir: cannot create directory `65530': No space left on device =============================== [ INFO: suspicious RCU usage. ] 4.6.0-work+ #321 Not tainted ------------------------------- kernel/cgroup.c:4008 cgroup_mutex or RCU read lock required! [ 527.243970] other info that might help us debug this: [ 527.244715] rcu_scheduler_active = 1, debug_locks = 0 2 locks held by kworker/0:5/1664: #0: ("cgroup_destroy"){.+.+..}, at: [] process_one_work+0x165/0x4a0 #1: ((&css->destroy_work)#3){+.+...}, at: [] process_one_work+0x165/0x4a0 [ 527.248098] stack backtrace: CPU: 0 PID: 1664 Comm: kworker/0:5 Not tainted 4.6.0-work+ #321 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.1-1.fc24 04/01/2014 Workqueue: cgroup_destroy css_free_work_fn Call Trace: dump_stack+0x68/0xa1 lockdep_rcu_suspicious+0xd7/0x110 css_next_descendant_pre+0x7d/0xb0 memcg_offline_kmem.part.44+0x4a/0xc0 mem_cgroup_css_free+0x1ec/0x200 css_free_work_fn+0x49/0x5e0 process_one_work+0x1c5/0x4a0 worker_thread+0x49/0x490 kthread+0xea/0x100 ret_from_fork+0x1f/0x40 Link: http://lkml.kernel.org/r/20160526203018.GG23194@mtj.duckdns.org Signed-off-by: Tejun Heo Acked-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6ba4dd988e2e49..67648e6b2ac85e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3661,6 +3661,7 @@ static void memcg_deactivate_kmem(struct mem_cgroup *memcg) * ordering is imposed by list_lru_node->lock taken by * memcg_drain_all_list_lrus(). */ + rcu_read_lock(); /* can be called from css_free w/o cgroup_mutex */ css_for_each_descendant_pre(css, &memcg->css) { child = mem_cgroup_from_css(css); BUG_ON(child->kmemcg_id != kmemcg_id); @@ -3668,6 +3669,8 @@ static void memcg_deactivate_kmem(struct mem_cgroup *memcg) if (!memcg->use_hierarchy) break; } + rcu_read_unlock(); + memcg_drain_all_list_lrus(kmemcg_id, parent->kmemcg_id); memcg_free_cache_id(kmemcg_id); From dea2cf7c0c6e42ccb1eea2baba028163597bcf22 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 1 Jun 2016 11:55:06 +0200 Subject: [PATCH 053/433] ecryptfs: forbid opening files without mmap handler commit 2f36db71009304b3f0b95afacd8eba1f9f046b87 upstream. This prevents users from triggering a stack overflow through a recursive invocation of pagefault handling that involves mapping procfs files into virtual memory. Signed-off-by: Jann Horn Acked-by: Tyler Hicks Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/kthread.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index 866bb18efefea9..e818f5ac7a2692 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "ecryptfs_kernel.h" struct ecryptfs_open_req { @@ -147,7 +148,7 @@ int ecryptfs_privileged_open(struct file **lower_file, flags |= IS_RDONLY(d_inode(lower_dentry)) ? O_RDONLY : O_RDWR; (*lower_file) = dentry_open(&req.path, flags, cred); if (!IS_ERR(*lower_file)) - goto out; + goto have_file; if ((flags & O_ACCMODE) == O_RDONLY) { rc = PTR_ERR((*lower_file)); goto out; @@ -165,8 +166,16 @@ int ecryptfs_privileged_open(struct file **lower_file, mutex_unlock(&ecryptfs_kthread_ctl.mux); wake_up(&ecryptfs_kthread_ctl.wait); wait_for_completion(&req.done); - if (IS_ERR(*lower_file)) + if (IS_ERR(*lower_file)) { rc = PTR_ERR(*lower_file); + goto out; + } +have_file: + if ((*lower_file)->f_op->mmap == NULL) { + fput(*lower_file); + *lower_file = NULL; + rc = -EMEDIUMTYPE; + } out: return rc; } From 47648b5862145187fc8273de0b5330bb9968feb3 Mon Sep 17 00:00:00 2001 From: Prasun Maiti Date: Mon, 6 Jun 2016 20:04:19 +0530 Subject: [PATCH 054/433] wext: Fix 32 bit iwpriv compatibility issue with 64 bit Kernel commit 3d5fdff46c4b2b9534fa2f9fc78e90a48e0ff724 upstream. iwpriv app uses iw_point structure to send data to Kernel. The iw_point structure holds a pointer. For compatibility Kernel converts the pointer as required for WEXT IOCTLs (SIOCIWFIRST to SIOCIWLAST). Some drivers may use iw_handler_def.private_args to populate iwpriv commands instead of iw_handler_def.private. For those case, the IOCTLs from SIOCIWFIRSTPRIV to SIOCIWLASTPRIV will follow the path ndo_do_ioctl(). Accordingly when the filled up iw_point structure comes from 32 bit iwpriv to 64 bit Kernel, Kernel will not convert the pointer and sends it to driver. So, the driver may get the invalid data. The pointer conversion for the IOCTLs (SIOCIWFIRSTPRIV to SIOCIWLASTPRIV), which follow the path ndo_do_ioctl(), is mandatory. This patch adds pointer conversion from 32 bit to 64 bit and vice versa, if the ioctl comes from 32 bit iwpriv to 64 bit Kernel. Signed-off-by: Prasun Maiti Signed-off-by: Ujjal Roy Tested-by: Dibyajyoti Ghosh Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/wext-core.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index b50ee5d622e14d..c753211cb83f8b 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -955,8 +955,29 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, return private(dev, iwr, cmd, info, handler); } /* Old driver API : call driver ioctl handler */ - if (dev->netdev_ops->ndo_do_ioctl) - return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + if (dev->netdev_ops->ndo_do_ioctl) { +#ifdef CONFIG_COMPAT + if (info->flags & IW_REQUEST_FLAG_COMPAT) { + int ret = 0; + struct iwreq iwr_lcl; + struct compat_iw_point *iwp_compat = (void *) &iwr->u.data; + + memcpy(&iwr_lcl, iwr, sizeof(struct iwreq)); + iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer); + iwr_lcl.u.data.length = iwp_compat->length; + iwr_lcl.u.data.flags = iwp_compat->flags; + + ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd); + + iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer); + iwp_compat->length = iwr_lcl.u.data.length; + iwp_compat->flags = iwr_lcl.u.data.flags; + + return ret; + } else +#endif + return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + } return -EOPNOTSUPP; } From 035a94d8d1acdb829575a987222a6d5c45e8a5f2 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 24 May 2016 15:54:04 -0700 Subject: [PATCH 055/433] x86/entry/traps: Don't force in_interrupt() to return true in IST handlers commit aaee8c3c5cce2d9107310dd9f3026b4f901d441c upstream. Forcing in_interrupt() to return true if we're not in a bona fide interrupt confuses the softirq code. This fixes warnings like: NOHZ: local_softirq_pending 282 ... which can happen when running things like selftests/x86. This will change perf's static percpu buffer usage in IST context. I think this is okay, and it's changing the behavior to match historical (pre-4.0) behavior. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 959274753857 ("x86, traps: Track entry into and exit from IST context") Link: http://lkml.kernel.org/r/cdc215f94d118d691d73df35275022331156fb45.1464130360.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/traps.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ade185a46b1da6..679302c312f8ee 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -109,6 +109,12 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_count_dec(); } +/* + * In IST context, we explicitly disable preemption. This serves two + * purposes: it makes it much less likely that we would accidentally + * schedule in IST context and it will force a warning if we somehow + * manage to schedule by accident. + */ void ist_enter(struct pt_regs *regs) { if (user_mode(regs)) { @@ -123,13 +129,7 @@ void ist_enter(struct pt_regs *regs) rcu_nmi_enter(); } - /* - * We are atomic because we're on the IST stack; or we're on - * x86_32, in which case we still shouldn't schedule; or we're - * on x86_64 and entered from user mode, in which case we're - * still atomic unless ist_begin_non_atomic is called. - */ - preempt_count_add(HARDIRQ_OFFSET); + preempt_disable(); /* This code is a bit fragile. Test it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); @@ -137,7 +137,7 @@ void ist_enter(struct pt_regs *regs) void ist_exit(struct pt_regs *regs) { - preempt_count_sub(HARDIRQ_OFFSET); + preempt_enable_no_resched(); if (!user_mode(regs)) rcu_nmi_exit(); @@ -168,7 +168,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) BUG_ON((unsigned long)(current_top_of_stack() - current_stack_pointer()) >= THREAD_SIZE); - preempt_count_sub(HARDIRQ_OFFSET); + preempt_enable_no_resched(); } /** @@ -178,7 +178,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) */ void ist_end_non_atomic(void) { - preempt_count_add(HARDIRQ_OFFSET); + preempt_disable(); } static nokprobe_inline int From 9beb96b344c846779f67d1be1cdafc66562b94ec Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 1 Jun 2016 11:55:05 +0200 Subject: [PATCH 056/433] proc: prevent stacking filesystems on top commit e54ad7f1ee263ffa5a2de9c609d58dfa27b21cd9 upstream. This prevents stacking filesystems (ecryptfs and overlayfs) from using procfs as lower filesystem. There is too much magic going on inside procfs, and there is no good reason to stack stuff on top of procfs. (For example, procfs does access checks in VFS open handlers, and ecryptfs by design calls open handlers from a kernel thread that doesn't drop privileges or so.) Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/root.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/proc/root.c b/fs/proc/root.c index 361ab4ee42fc36..ec649c92d27088 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -121,6 +121,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, if (IS_ERR(sb)) return ERR_CAST(sb); + /* + * procfs isn't actually a stacking filesystem; however, there is + * too much magic going on inside it to permit stacking things on + * top of it + */ + sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; + if (!proc_parse_options(options, ns)) { deactivate_locked_super(sb); return ERR_PTR(-EINVAL); From c08b1a593a042ae01e788ec5504bee2cfc83e1f2 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 1 Jun 2016 11:55:07 +0200 Subject: [PATCH 057/433] sched: panic on corrupted stack end commit 29d6455178a09e1dc340380c582b13356227e8df upstream. Until now, hitting this BUG_ON caused a recursive oops (because oops handling involves do_exit(), which calls into the scheduler, which in turn raises an oops), which caused stuff below the stack to be overwritten until a panic happened (e.g. via an oops in interrupt context, caused by the overwritten CPU index in the thread_info). Just panic directly. Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 55bebf924946b9..6c0cdb5a73f807 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3008,7 +3008,8 @@ static noinline void __schedule_bug(struct task_struct *prev) static inline void schedule_debug(struct task_struct *prev) { #ifdef CONFIG_SCHED_STACK_END_CHECK - BUG_ON(task_stack_end_corrupted(prev)); + if (task_stack_end_corrupted(prev)) + panic("corrupted stack end detected inside scheduler\n"); #endif if (unlikely(in_atomic_preempt_off())) { From 2b11d80e1aa70b56c6431e4dc3c686ffc61a73bf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Jun 2016 21:26:55 -0400 Subject: [PATCH 058/433] fix d_walk()/non-delayed __d_free() race commit 3d56c25e3bb0726a5c5e16fc2d9e38f8ed763085 upstream. Ascend-to-parent logics in d_walk() depends on all encountered child dentries not getting freed without an RCU delay. Unfortunately, in quite a few cases it is not true, with hard-to-hit oopsable race as the result. Fortunately, the fix is simiple; right now the rule is "if it ever been hashed, freeing must be delayed" and changing it to "if it ever had a parent, freeing must be delayed" closes that hole and covers all cases the old rule used to cover. Moreover, pipes and sockets remain _not_ covered, so we do not introduce RCU delay in the cases which are the reason for having that delay conditional in the first place. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 18effa378f9728..108d7d810be3ff 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1618,7 +1618,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) struct dentry *dentry = __d_alloc(parent->d_sb, name); if (!dentry) return NULL; - + dentry->d_flags |= DCACHE_RCUACCESS; spin_lock(&parent->d_lock); /* * don't need child lock because it is not subject @@ -2413,7 +2413,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { BUG_ON(!d_unhashed(entry)); hlist_bl_lock(b); - entry->d_flags |= DCACHE_RCUACCESS; hlist_bl_add_head_rcu(&entry->d_hash, b); hlist_bl_unlock(b); } @@ -2632,6 +2631,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, /* ... and switch them in the tree */ if (IS_ROOT(dentry)) { /* splicing a tree */ + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_parent = target->d_parent; target->d_parent = target; list_del_init(&target->d_child); From c9bc125c922e855055cd08b2ec064180218be161 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 18 Jan 2016 06:32:30 -0500 Subject: [PATCH 059/433] sparc: Fix system call tracing register handling. [ Upstream commit 1a40b95374f680625318ab61d81958e949e0afe3 ] A system call trace trigger on entry allows the tracing process to inspect and potentially change the traced process's registers. Account for that by reloading the %g1 (syscall number) and %i0-%i5 (syscall argument) values. We need to be careful to revalidate the range of %g1, and reload the system call table entry it corresponds to into %l7. Reported-by: Mike Frysinger Signed-off-by: David S. Miller Tested-by: Mike Frysinger Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/entry.S | 17 +++++++++++++++++ arch/sparc/kernel/syscalls.S | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 33c02b15f47859..a83707c83be803 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -948,7 +948,24 @@ linux_syscall_trace: cmp %o0, 0 bne 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ld [%sp + STACKFRAME_SZ + PT_G1], %g1 + sethi %hi(sys_call_table), %l7 + ld [%sp + STACKFRAME_SZ + PT_I0], %i0 + or %l7, %lo(sys_call_table), %l7 + ld [%sp + STACKFRAME_SZ + PT_I1], %i1 + ld [%sp + STACKFRAME_SZ + PT_I2], %i2 + ld [%sp + STACKFRAME_SZ + PT_I3], %i3 + ld [%sp + STACKFRAME_SZ + PT_I4], %i4 + ld [%sp + STACKFRAME_SZ + PT_I5], %i5 + cmp %g1, NR_syscalls + bgeu 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 mov %i0, %o0 + ld [%l7 + %l4], %l7 mov %i1, %o1 mov %i2, %o2 mov %i3, %o3 diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index bb0008927598b1..c4a1b5c40e4efc 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -158,7 +158,25 @@ linux_syscall_trace32: add %sp, PTREGS_OFF, %o0 brnz,pn %o0, 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 + sethi %hi(sys_call_table32), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 + or %l7, %lo(sys_call_table32), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 + ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2 + ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3 + ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4 + ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5 + + cmp %g1, NR_syscalls + bgeu,pn %xcc, 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 srl %i0, 0, %o0 + lduw [%l7 + %l4], %l7 srl %i4, 0, %o4 srl %i1, 0, %o1 srl %i2, 0, %o2 @@ -170,7 +188,25 @@ linux_syscall_trace: add %sp, PTREGS_OFF, %o0 brnz,pn %o0, 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 + sethi %hi(sys_call_table64), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 + or %l7, %lo(sys_call_table64), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 + ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2 + ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3 + ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4 + ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5 + + cmp %g1, NR_syscalls + bgeu,pn %xcc, 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 mov %i0, %o0 + lduw [%l7 + %l4], %l7 mov %i1, %o1 mov %i2, %o2 mov %i3, %o3 From e9c74337a7c03d33f2afd5bb341cc20ad209698c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 27 Apr 2016 17:27:37 -0400 Subject: [PATCH 060/433] sparc64: Fix bootup regressions on some Kconfig combinations. [ Upstream commit 49fa5230462f9f2c4e97c81356473a6bdf06c422 ] The system call tracing bug fix mentioned in the Fixes tag below increased the amount of assembler code in the sequence of assembler files included by head_64.S This caused to total set of code to exceed 0x4000 bytes in size, which overflows the expression in head_64.S that works to place swapper_tsb at address 0x408000. When this is violated, the TSB is not properly aligned, and also the trap table is not aligned properly either. All of this together results in failed boots. So, do two things: 1) Simplify some code by using ba,a instead of ba/nop to get those bytes back. 2) Add a linker script assertion to make sure that if this happens again the build will fail. Fixes: 1a40b95374f6 ("sparc: Fix system call tracing register handling.") Reported-by: Meelis Roos Reported-by: Joerg Abraham Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/cherrs.S | 14 +++++--------- arch/sparc/kernel/fpu_traps.S | 11 +++++------ arch/sparc/kernel/head_64.S | 24 ++++++++---------------- arch/sparc/kernel/misctrap.S | 12 ++++-------- arch/sparc/kernel/spiterrs.S | 18 ++++++------------ arch/sparc/kernel/utrap.S | 3 +-- arch/sparc/kernel/vmlinux.lds.S | 4 ++++ arch/sparc/kernel/winfixup.S | 3 +-- 8 files changed, 34 insertions(+), 55 deletions(-) diff --git a/arch/sparc/kernel/cherrs.S b/arch/sparc/kernel/cherrs.S index 4ee1ad420862d4..655628def68e6b 100644 --- a/arch/sparc/kernel/cherrs.S +++ b/arch/sparc/kernel/cherrs.S @@ -214,8 +214,7 @@ do_dcpe_tl1_nonfatal: /* Ok we may use interrupt globals safely. */ subcc %g1, %g2, %g1 ! Next cacheline bge,pt %icc, 1b nop - ba,pt %xcc, dcpe_icpe_tl1_common - nop + ba,a,pt %xcc, dcpe_icpe_tl1_common do_dcpe_tl1_fatal: sethi %hi(1f), %g7 @@ -224,8 +223,7 @@ do_dcpe_tl1_fatal: mov 0x2, %o0 call cheetah_plus_parity_error add %sp, PTREGS_OFF, %o1 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_dcpe_tl1,.-do_dcpe_tl1 .globl do_icpe_tl1 @@ -259,8 +257,7 @@ do_icpe_tl1_nonfatal: /* Ok we may use interrupt globals safely. */ subcc %g1, %g2, %g1 bge,pt %icc, 1b nop - ba,pt %xcc, dcpe_icpe_tl1_common - nop + ba,a,pt %xcc, dcpe_icpe_tl1_common do_icpe_tl1_fatal: sethi %hi(1f), %g7 @@ -269,8 +266,7 @@ do_icpe_tl1_fatal: mov 0x3, %o0 call cheetah_plus_parity_error add %sp, PTREGS_OFF, %o1 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_icpe_tl1,.-do_icpe_tl1 .type dcpe_icpe_tl1_common,#function @@ -456,7 +452,7 @@ __cheetah_log_error: cmp %g2, 0x63 be c_cee nop - ba,pt %xcc, c_deferred + ba,a,pt %xcc, c_deferred .size __cheetah_log_error,.-__cheetah_log_error /* Cheetah FECC trap handling, we get here from tl{0,1}_fecc diff --git a/arch/sparc/kernel/fpu_traps.S b/arch/sparc/kernel/fpu_traps.S index a6864826a4bd96..336d2750fe78c3 100644 --- a/arch/sparc/kernel/fpu_traps.S +++ b/arch/sparc/kernel/fpu_traps.S @@ -100,8 +100,8 @@ do_fpdis: fmuld %f0, %f2, %f26 faddd %f0, %f2, %f28 fmuld %f0, %f2, %f30 - b,pt %xcc, fpdis_exit - nop + ba,a,pt %xcc, fpdis_exit + 2: andcc %g5, FPRS_DU, %g0 bne,pt %icc, 3f fzero %f32 @@ -144,8 +144,8 @@ do_fpdis: fmuld %f32, %f34, %f58 faddd %f32, %f34, %f60 fmuld %f32, %f34, %f62 - ba,pt %xcc, fpdis_exit - nop + ba,a,pt %xcc, fpdis_exit + 3: mov SECONDARY_CONTEXT, %g3 add %g6, TI_FPREGS, %g1 @@ -197,8 +197,7 @@ fpdis_exit2: fp_other_bounce: call do_fpother add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size fp_other_bounce,.-fp_other_bounce .align 32 diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index f2d30cab5b3f38..51faf92ace001c 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -461,9 +461,8 @@ sun4v_chip_type: subcc %g3, 1, %g3 bne,pt %xcc, 41b add %g1, 1, %g1 - mov SUN4V_CHIP_SPARC64X, %g4 ba,pt %xcc, 5f - nop + mov SUN4V_CHIP_SPARC64X, %g4 49: mov SUN4V_CHIP_UNKNOWN, %g4 @@ -548,8 +547,7 @@ sun4u_init: stxa %g0, [%g7] ASI_DMMU membar #Sync - ba,pt %xcc, sun4u_continue - nop + ba,a,pt %xcc, sun4u_continue sun4v_init: /* Set ctx 0 */ @@ -560,14 +558,12 @@ sun4v_init: mov SECONDARY_CONTEXT, %g7 stxa %g0, [%g7] ASI_MMU membar #Sync - ba,pt %xcc, niagara_tlb_fixup - nop + ba,a,pt %xcc, niagara_tlb_fixup sun4u_continue: BRANCH_IF_ANY_CHEETAH(g1, g7, cheetah_tlb_fixup) - ba,pt %xcc, spitfire_tlb_fixup - nop + ba,a,pt %xcc, spitfire_tlb_fixup niagara_tlb_fixup: mov 3, %g2 /* Set TLB type to hypervisor. */ @@ -639,8 +635,7 @@ niagara_patch: call hypervisor_patch_cachetlbops nop - ba,pt %xcc, tlb_fixup_done - nop + ba,a,pt %xcc, tlb_fixup_done cheetah_tlb_fixup: mov 2, %g2 /* Set TLB type to cheetah+. */ @@ -659,8 +654,7 @@ cheetah_tlb_fixup: call cheetah_patch_cachetlbops nop - ba,pt %xcc, tlb_fixup_done - nop + ba,a,pt %xcc, tlb_fixup_done spitfire_tlb_fixup: /* Set TLB type to spitfire. */ @@ -782,8 +776,7 @@ setup_trap_table: call %o1 add %sp, (2047 + 128), %o0 - ba,pt %xcc, 2f - nop + ba,a,pt %xcc, 2f 1: sethi %hi(sparc64_ttable_tl0), %o0 set prom_set_trap_table_name, %g2 @@ -822,8 +815,7 @@ setup_trap_table: BRANCH_IF_ANY_CHEETAH(o2, o3, 1f) - ba,pt %xcc, 2f - nop + ba,a,pt %xcc, 2f /* Disable STICK_INT interrupts. */ 1: diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S index 753b4f031bfb71..34b4933900bf76 100644 --- a/arch/sparc/kernel/misctrap.S +++ b/arch/sparc/kernel/misctrap.S @@ -18,8 +18,7 @@ __do_privact: 109: or %g7, %lo(109b), %g7 call do_privact add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __do_privact,.-__do_privact .type do_mna,#function @@ -46,8 +45,7 @@ do_mna: mov %l5, %o2 call mem_address_unaligned add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_mna,.-do_mna .type do_lddfmna,#function @@ -65,8 +63,7 @@ do_lddfmna: mov %l5, %o2 call handle_lddfmna add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_lddfmna,.-do_lddfmna .type do_stdfmna,#function @@ -84,8 +81,7 @@ do_stdfmna: mov %l5, %o2 call handle_stdfmna add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_stdfmna,.-do_stdfmna .type breakpoint_trap,#function diff --git a/arch/sparc/kernel/spiterrs.S b/arch/sparc/kernel/spiterrs.S index c357e40ffd0152..4a73009f66a572 100644 --- a/arch/sparc/kernel/spiterrs.S +++ b/arch/sparc/kernel/spiterrs.S @@ -85,8 +85,7 @@ __spitfire_cee_trap_continue: ba,pt %xcc, etraptl1 rd %pc, %g7 - ba,pt %xcc, 2f - nop + ba,a,pt %xcc, 2f 1: ba,pt %xcc, etrap_irq rd %pc, %g7 @@ -100,8 +99,7 @@ __spitfire_cee_trap_continue: mov %l5, %o2 call spitfire_access_error add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_access_error,.-__spitfire_access_error /* This is the trap handler entry point for ECC correctable @@ -179,8 +177,7 @@ __spitfire_data_access_exception_tl1: mov %l5, %o2 call spitfire_data_access_exception_tl1 add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_data_access_exception_tl1,.-__spitfire_data_access_exception_tl1 .type __spitfire_data_access_exception,#function @@ -200,8 +197,7 @@ __spitfire_data_access_exception: mov %l5, %o2 call spitfire_data_access_exception add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_data_access_exception,.-__spitfire_data_access_exception .type __spitfire_insn_access_exception_tl1,#function @@ -220,8 +216,7 @@ __spitfire_insn_access_exception_tl1: mov %l5, %o2 call spitfire_insn_access_exception_tl1 add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_insn_access_exception_tl1,.-__spitfire_insn_access_exception_tl1 .type __spitfire_insn_access_exception,#function @@ -240,6 +235,5 @@ __spitfire_insn_access_exception: mov %l5, %o2 call spitfire_insn_access_exception add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_insn_access_exception,.-__spitfire_insn_access_exception diff --git a/arch/sparc/kernel/utrap.S b/arch/sparc/kernel/utrap.S index b7f0f3f3a909b0..c731e8023d3e7c 100644 --- a/arch/sparc/kernel/utrap.S +++ b/arch/sparc/kernel/utrap.S @@ -11,8 +11,7 @@ utrap_trap: /* %g3=handler,%g4=level */ mov %l4, %o1 call bad_trap add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap invoke_utrap: sllx %g3, 3, %g3 diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index f1a2f688b28a31..4a41d412dd3dda 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -33,6 +33,10 @@ ENTRY(_start) jiffies = jiffies_64; #endif +#ifdef CONFIG_SPARC64 +ASSERT((swapper_tsb == 0x0000000000408000), "Error: sparc64 early assembler too large") +#endif + SECTIONS { #ifdef CONFIG_SPARC64 diff --git a/arch/sparc/kernel/winfixup.S b/arch/sparc/kernel/winfixup.S index 1e67ce95836972..855019a8590ea5 100644 --- a/arch/sparc/kernel/winfixup.S +++ b/arch/sparc/kernel/winfixup.S @@ -32,8 +32,7 @@ fill_fixup: rd %pc, %g7 call do_sparc64_fault add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap /* Be very careful about usage of the trap globals here. * You cannot touch %g5 as that has the fault information. From 4185bd68ef40b275669efec624c0d792cd7a2acf Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Tue, 5 Jan 2016 22:35:35 -0800 Subject: [PATCH 061/433] sparc64: Fix numa node distance initialization [ Upstream commit 36beca6571c941b28b0798667608239731f9bc3a ] Orabug: 22495713 Currently, NUMA node distance matrix is initialized only when a machine descriptor (MD) exists. However, sun4u machines (e.g. Sun Blade 2500) do not have an MD and thus distance values were left uninitialized. The initialization is now moved such that it happens on both sun4u and sun4v. Signed-off-by: Nitin Gupta Tested-by: Mikael Pettersson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/mm/init_64.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 3025bd57f7abba..6f216853f2724f 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1267,13 +1267,6 @@ static int __init numa_parse_mdesc(void) int i, j, err, count; u64 node; - /* Some sane defaults for numa latency values */ - for (i = 0; i < MAX_NUMNODES; i++) { - for (j = 0; j < MAX_NUMNODES; j++) - numa_latency[i][j] = (i == j) ? - LOCAL_DISTANCE : REMOTE_DISTANCE; - } - node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); if (node == MDESC_NODE_NULL) { mdesc_release(md); @@ -1369,10 +1362,18 @@ static int __init numa_parse_sun4u(void) static int __init bootmem_init_numa(void) { + int i, j; int err = -1; numadbg("bootmem_init_numa()\n"); + /* Some sane defaults for numa latency values */ + for (i = 0; i < MAX_NUMNODES; i++) { + for (j = 0; j < MAX_NUMNODES; j++) + numa_latency[i][j] = (i == j) ? + LOCAL_DISTANCE : REMOTE_DISTANCE; + } + if (numa_enabled) { if (tlb_type == hypervisor) err = numa_parse_mdesc(); From b1206090828584bcb1caf4c850d175f297eb7bc8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Mar 2016 00:25:32 -0500 Subject: [PATCH 062/433] sparc64: Fix sparc64_set_context stack handling. [ Upstream commit 397d1533b6cce0ccb5379542e2e6d079f6936c46 ] Like a signal return, we should use synchronize_user_stack() rather than flush_user_windows(). Reported-by: Ilya Malakhov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/signal_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index d88beff47bab3e..39aaec173f66eb 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -52,7 +52,7 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs) unsigned char fenab; int err; - flush_user_windows(); + synchronize_user_stack(); if (get_thread_wsaved() || (((unsigned long)ucp) & (sizeof(unsigned long)-1)) || (!__access_ok(ucp, sizeof(*ucp)))) From ccd02310db44df820d1e8c54a97daf596dea1c9e Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 24 Mar 2016 13:02:22 -0700 Subject: [PATCH 063/433] sparc/PCI: Fix for panic while enabling SR-IOV [ Upstream commit d0c31e02005764dae0aab130a57e9794d06b824d ] We noticed this panic while enabling SR-IOV in sparc. mlx4_core: Mellanox ConnectX core driver v2.2-1 (Jan 1 2015) mlx4_core: Initializing 0007:01:00.0 mlx4_core 0007:01:00.0: Enabling SR-IOV with 5 VFs mlx4_core: Initializing 0007:01:00.1 Unable to handle kernel NULL pointer dereference insmod(10010): Oops [#1] CPU: 391 PID: 10010 Comm: insmod Not tainted 4.1.12-32.el6uek.kdump2.sparc64 #1 TPC: I7: <__mlx4_init_one+0x324/0x500 [mlx4_core]> Call Trace: [00000000104c5ea4] __mlx4_init_one+0x324/0x500 [mlx4_core] [00000000104c613c] mlx4_init_one+0xbc/0x120 [mlx4_core] [0000000000725f14] local_pci_probe+0x34/0xa0 [0000000000726028] pci_call_probe+0xa8/0xe0 [0000000000726310] pci_device_probe+0x50/0x80 [000000000079f700] really_probe+0x140/0x420 [000000000079fa24] driver_probe_device+0x44/0xa0 [000000000079fb5c] __device_attach+0x3c/0x60 [000000000079d85c] bus_for_each_drv+0x5c/0xa0 [000000000079f588] device_attach+0x88/0xc0 [000000000071acd0] pci_bus_add_device+0x30/0x80 [0000000000736090] virtfn_add.clone.1+0x210/0x360 [00000000007364a4] sriov_enable+0x2c4/0x520 [000000000073672c] pci_enable_sriov+0x2c/0x40 [00000000104c2d58] mlx4_enable_sriov+0xf8/0x180 [mlx4_core] [00000000104c49ac] mlx4_load_one+0x42c/0xd40 [mlx4_core] Disabling lock debugging due to kernel taint Caller[00000000104c5ea4]: __mlx4_init_one+0x324/0x500 [mlx4_core] Caller[00000000104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core] Caller[0000000000725f14]: local_pci_probe+0x34/0xa0 Caller[0000000000726028]: pci_call_probe+0xa8/0xe0 Caller[0000000000726310]: pci_device_probe+0x50/0x80 Caller[000000000079f700]: really_probe+0x140/0x420 Caller[000000000079fa24]: driver_probe_device+0x44/0xa0 Caller[000000000079fb5c]: __device_attach+0x3c/0x60 Caller[000000000079d85c]: bus_for_each_drv+0x5c/0xa0 Caller[000000000079f588]: device_attach+0x88/0xc0 Caller[000000000071acd0]: pci_bus_add_device+0x30/0x80 Caller[0000000000736090]: virtfn_add.clone.1+0x210/0x360 Caller[00000000007364a4]: sriov_enable+0x2c4/0x520 Caller[000000000073672c]: pci_enable_sriov+0x2c/0x40 Caller[00000000104c2d58]: mlx4_enable_sriov+0xf8/0x180 [mlx4_core] Caller[00000000104c49ac]: mlx4_load_one+0x42c/0xd40 [mlx4_core] Caller[00000000104c5f90]: __mlx4_init_one+0x410/0x500 [mlx4_core] Caller[00000000104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core] Caller[0000000000725f14]: local_pci_probe+0x34/0xa0 Caller[0000000000726028]: pci_call_probe+0xa8/0xe0 Caller[0000000000726310]: pci_device_probe+0x50/0x80 Caller[000000000079f700]: really_probe+0x140/0x420 Caller[000000000079fa24]: driver_probe_device+0x44/0xa0 Caller[000000000079fb08]: __driver_attach+0x88/0xa0 Caller[000000000079d90c]: bus_for_each_dev+0x6c/0xa0 Caller[000000000079f29c]: driver_attach+0x1c/0x40 Caller[000000000079e35c]: bus_add_driver+0x17c/0x220 Caller[00000000007a02d4]: driver_register+0x74/0x120 Caller[00000000007263fc]: __pci_register_driver+0x3c/0x60 Caller[00000000104f62bc]: mlx4_init+0x60/0xcc [mlx4_core] Kernel panic - not syncing: Fatal exception Press Stop-A (L1-A) to return to the boot prom ---[ end Kernel panic - not syncing: Fatal exception Details: Here is the call sequence virtfn_add->__mlx4_init_one->dma_set_mask->dma_supported The panic happened at line 760(file arch/sparc/kernel/iommu.c) 758 int dma_supported(struct device *dev, u64 device_mask) 759 { 760 struct iommu *iommu = dev->archdata.iommu; 761 u64 dma_addr_mask = iommu->dma_addr_mask; 762 763 if (device_mask >= (1UL << 32UL)) 764 return 0; 765 766 if ((device_mask & dma_addr_mask) == dma_addr_mask) 767 return 1; 768 769 #ifdef CONFIG_PCI 770 if (dev_is_pci(dev)) 771 return pci64_dma_supported(to_pci_dev(dev), device_mask); 772 #endif 773 774 return 0; 775 } 776 EXPORT_SYMBOL(dma_supported); Same panic happened with Intel ixgbe driver also. SR-IOV code looks for arch specific data while enabling VFs. When VF device is added, driver probe function makes set of calls to initialize the pci device. Because the VF device is added different way than the normal PF device(which happens via of_create_pci_dev for sparc), some of the arch specific initialization does not happen for VF device. That causes panic when archdata is accessed. To fix this, I have used already defined weak function pcibios_setup_device to copy archdata from PF to VF. Also verified the fix. Signed-off-by: Babu Moger Signed-off-by: Sowmini Varadhan Reviewed-by: Ethan Zhao Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/pci.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index badf0951d73c8f..9f9614df9e1e54 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -994,6 +994,23 @@ void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling */ } +#ifdef CONFIG_PCI_IOV +int pcibios_add_device(struct pci_dev *dev) +{ + struct pci_dev *pdev; + + /* Add sriov arch specific initialization here. + * Copy dev_archdata from PF to VF + */ + if (dev->is_virtfn) { + pdev = dev->physfn; + memcpy(&dev->dev.archdata, &pdev->dev.archdata, + sizeof(struct dev_archdata)); + } + return 0; +} +#endif /* CONFIG_PCI_IOV */ + static int __init pcibios_init(void) { pci_dfl_cache_line_size = 64 >> 2; From 87575e31be28afb08665f412ac269909c5911a33 Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Wed, 30 Mar 2016 11:17:13 -0700 Subject: [PATCH 064/433] sparc64: Reduce TLB flushes during hugepte changes [ Upstream commit 24e49ee3d76b70853a96520e46b8837e5eae65b2 ] During hugepage map/unmap, TSB and TLB flushes are currently issued at every PAGE_SIZE'd boundary which is unnecessary. We now issue the flush at REAL_HPAGE_SIZE boundaries only. Without this patch workloads which unmap a large hugepage backed VMA region get CPU lockups due to excessive TLB flush calls. Orabug: 22365539, 22643230, 22995196 Signed-off-by: Nitin Gupta Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/pgtable_64.h | 43 +++++++++++++++++++++------- arch/sparc/include/asm/tlbflush_64.h | 3 +- arch/sparc/mm/hugetlbpage.c | 33 +++++++++++++++++---- arch/sparc/mm/init_64.c | 12 -------- arch/sparc/mm/tlb.c | 25 +++++++++++----- arch/sparc/mm/tsb.c | 32 +++++++++++---------- 6 files changed, 97 insertions(+), 51 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 131d36fcd07a60..408b715c95a5ca 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -375,7 +375,7 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot) #define pgprot_noncached pgprot_noncached #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) -static inline pte_t pte_mkhuge(pte_t pte) +static inline unsigned long __pte_huge_mask(void) { unsigned long mask; @@ -390,8 +390,19 @@ static inline pte_t pte_mkhuge(pte_t pte) : "=r" (mask) : "i" (_PAGE_SZHUGE_4U), "i" (_PAGE_SZHUGE_4V)); - return __pte(pte_val(pte) | mask); + return mask; +} + +static inline pte_t pte_mkhuge(pte_t pte) +{ + return __pte(pte_val(pte) | __pte_huge_mask()); +} + +static inline bool is_hugetlb_pte(pte_t pte) +{ + return !!(pte_val(pte) & __pte_huge_mask()); } + #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline pmd_t pmd_mkhuge(pmd_t pmd) { @@ -403,6 +414,11 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) return __pmd(pte_val(pte)); } #endif +#else +static inline bool is_hugetlb_pte(pte_t pte) +{ + return false; +} #endif static inline pte_t pte_mkdirty(pte_t pte) @@ -865,6 +881,19 @@ static inline unsigned long pud_pfn(pud_t pud) void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t orig, int fullmm); +static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, + pte_t *ptep, pte_t orig, int fullmm) +{ + /* It is more efficient to let flush_tlb_kernel_range() + * handle init_mm tlb flushes. + * + * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U + * and SUN4V pte layout, so this inline test is fine. + */ + if (likely(mm != &init_mm) && pte_accessible(mm, orig)) + tlb_batch_add(mm, vaddr, ptep, orig, fullmm); +} + #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, @@ -881,15 +910,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t orig = *ptep; *ptep = pte; - - /* It is more efficient to let flush_tlb_kernel_range() - * handle init_mm tlb flushes. - * - * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U - * and SUN4V pte layout, so this inline test is fine. - */ - if (likely(mm != &init_mm) && pte_accessible(mm, orig)) - tlb_batch_add(mm, addr, ptep, orig, fullmm); + maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm); } #define set_pte_at(mm,addr,ptep,pte) \ diff --git a/arch/sparc/include/asm/tlbflush_64.h b/arch/sparc/include/asm/tlbflush_64.h index dea1cfa2122bec..a8e192e907003d 100644 --- a/arch/sparc/include/asm/tlbflush_64.h +++ b/arch/sparc/include/asm/tlbflush_64.h @@ -8,6 +8,7 @@ #define TLB_BATCH_NR 192 struct tlb_batch { + bool huge; struct mm_struct *mm; unsigned long tlb_nr; unsigned long active; @@ -16,7 +17,7 @@ struct tlb_batch { void flush_tsb_kernel_range(unsigned long start, unsigned long end); void flush_tsb_user(struct tlb_batch *tb); -void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr); +void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge); /* TLB flush operations. */ diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 131eaf4ad7f598..364d093f46c6bf 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -176,17 +176,31 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { int i; + pte_t orig[2]; + unsigned long nptes; if (!pte_present(*ptep) && pte_present(entry)) mm->context.huge_pte_count++; addr &= HPAGE_MASK; - for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { - set_pte_at(mm, addr, ptep, entry); + + nptes = 1 << HUGETLB_PAGE_ORDER; + orig[0] = *ptep; + orig[1] = *(ptep + nptes / 2); + for (i = 0; i < nptes; i++) { + *ptep = entry; ptep++; addr += PAGE_SIZE; pte_val(entry) += PAGE_SIZE; } + + /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ + addr -= REAL_HPAGE_SIZE; + ptep -= nptes / 2; + maybe_tlb_batch_add(mm, addr, ptep, orig[1], 0); + addr -= REAL_HPAGE_SIZE; + ptep -= nptes / 2; + maybe_tlb_batch_add(mm, addr, ptep, orig[0], 0); } pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, @@ -194,19 +208,28 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, { pte_t entry; int i; + unsigned long nptes; entry = *ptep; if (pte_present(entry)) mm->context.huge_pte_count--; addr &= HPAGE_MASK; - - for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { - pte_clear(mm, addr, ptep); + nptes = 1 << HUGETLB_PAGE_ORDER; + for (i = 0; i < nptes; i++) { + *ptep = __pte(0UL); addr += PAGE_SIZE; ptep++; } + /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ + addr -= REAL_HPAGE_SIZE; + ptep -= nptes / 2; + maybe_tlb_batch_add(mm, addr, ptep, entry, 0); + addr -= REAL_HPAGE_SIZE; + ptep -= nptes / 2; + maybe_tlb_batch_add(mm, addr, ptep, entry, 0); + return entry; } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 6f216853f2724f..76d11d1c5995b4 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -324,18 +324,6 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde tsb_insert(tsb, tag, tte); } -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) -static inline bool is_hugetlb_pte(pte_t pte) -{ - if ((tlb_type == hypervisor && - (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) || - (tlb_type != hypervisor && - (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) - return true; - return false; -} -#endif - void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { struct mm_struct *mm; diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 9df2190c097e1b..f81cd973670079 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -67,7 +67,7 @@ void arch_leave_lazy_mmu_mode(void) } static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, - bool exec) + bool exec, bool huge) { struct tlb_batch *tb = &get_cpu_var(tlb_batch); unsigned long nr; @@ -84,13 +84,21 @@ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, } if (!tb->active) { - flush_tsb_user_page(mm, vaddr); + flush_tsb_user_page(mm, vaddr, huge); global_flush_tlb_page(mm, vaddr); goto out; } - if (nr == 0) + if (nr == 0) { tb->mm = mm; + tb->huge = huge; + } + + if (tb->huge != huge) { + flush_tlb_pending(); + tb->huge = huge; + nr = 0; + } tb->vaddrs[nr] = vaddr; tb->tlb_nr = ++nr; @@ -104,6 +112,8 @@ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t orig, int fullmm) { + bool huge = is_hugetlb_pte(orig); + if (tlb_type != hypervisor && pte_dirty(orig)) { unsigned long paddr, pfn = pte_pfn(orig); @@ -129,7 +139,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, no_cache_flush: if (!fullmm) - tlb_batch_add_one(mm, vaddr, pte_exec(orig)); + tlb_batch_add_one(mm, vaddr, pte_exec(orig), huge); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -145,7 +155,7 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, if (pte_val(*pte) & _PAGE_VALID) { bool exec = pte_exec(*pte); - tlb_batch_add_one(mm, vaddr, exec); + tlb_batch_add_one(mm, vaddr, exec, false); } pte++; vaddr += PAGE_SIZE; @@ -185,8 +195,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, pte_t orig_pte = __pte(pmd_val(orig)); bool exec = pte_exec(orig_pte); - tlb_batch_add_one(mm, addr, exec); - tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec); + tlb_batch_add_one(mm, addr, exec, true); + tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec, + true); } else { tlb_batch_pmd_scan(mm, addr, orig); } diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index a06576683c38a0..a0604a493a361e 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -76,14 +76,15 @@ void flush_tsb_user(struct tlb_batch *tb) spin_lock_irqsave(&mm->context.lock, flags); - base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; - nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; - if (tlb_type == cheetah_plus || tlb_type == hypervisor) - base = __pa(base); - __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); - + if (!tb->huge) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; + nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); + } #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { + if (tb->huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) { base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) @@ -94,20 +95,21 @@ void flush_tsb_user(struct tlb_batch *tb) spin_unlock_irqrestore(&mm->context.lock, flags); } -void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) +void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge) { unsigned long nentries, base, flags; spin_lock_irqsave(&mm->context.lock, flags); - base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; - nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; - if (tlb_type == cheetah_plus || tlb_type == hypervisor) - base = __pa(base); - __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); - + if (!huge) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; + nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); + } #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { + if (huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) { base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) From 6bb3290ce9662055efcf13dc18c12bb62f6f39dc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 25 May 2016 12:51:20 -0700 Subject: [PATCH 065/433] sparc64: Take ctx_alloc_lock properly in hugetlb_setup(). [ Upstream commit 9ea46abe22550e3366ff7cee2f8391b35b12f730 ] On cheetahplus chips we take the ctx_alloc_lock in order to modify the TLB lookup parameters for the indexed TLBs, which are stored in the context register. This is called with interrupts disabled, however ctx_alloc_lock is an IRQ safe lock, therefore we must take acquire/release it properly with spin_{lock,unlock}_irq(). Reported-by: Meelis Roos Tested-by: Meelis Roos Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/mm/init_64.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 76d11d1c5995b4..3c4b8975fa76ac 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2821,9 +2821,10 @@ void hugetlb_setup(struct pt_regs *regs) * the Data-TLB for huge pages. */ if (tlb_type == cheetah_plus) { + bool need_context_reload = false; unsigned long ctx; - spin_lock(&ctx_alloc_lock); + spin_lock_irq(&ctx_alloc_lock); ctx = mm->context.sparc64_ctx_val; ctx &= ~CTX_PGSZ_MASK; ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; @@ -2842,9 +2843,12 @@ void hugetlb_setup(struct pt_regs *regs) * also executing in this address space. */ mm->context.sparc64_ctx_val = ctx; - on_each_cpu(context_reload, mm, 0); + need_context_reload = true; } - spin_unlock(&ctx_alloc_lock); + spin_unlock_irq(&ctx_alloc_lock); + + if (need_context_reload) + on_each_cpu(context_reload, mm, 0); } } #endif From 1fda90c39d8ef6acbedfd3cd9bd710a5bcc490c3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 28 May 2016 21:21:31 -0700 Subject: [PATCH 066/433] sparc: Harden signal return frame checks. [ Upstream commit d11c2a0de2824395656cf8ed15811580c9dd38aa ] All signal frames must be at least 16-byte aligned, because that is the alignment we explicitly create when we build signal return stack frames. All stack pointers must be at least 8-byte aligned. Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/signal32.c | 46 ++++++++++++++++++++++------------ arch/sparc/kernel/signal_32.c | 41 +++++++++++++++++++----------- arch/sparc/kernel/signal_64.c | 31 +++++++++++++++-------- arch/sparc/kernel/sigutil_32.c | 9 ++++++- arch/sparc/kernel/sigutil_64.c | 10 ++++++-- 5 files changed, 92 insertions(+), 45 deletions(-) diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 4eed773a77358b..77655f0f0fc7ea 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -138,12 +138,24 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) return 0; } +/* Checks if the fp is valid. We always build signal frames which are + * 16-byte aligned, therefore we can always enforce that the restore + * frame has that property as well. + */ +static bool invalid_frame_pointer(void __user *fp, int fplen) +{ + if ((((unsigned long) fp) & 15) || + ((unsigned long)fp) > 0x100000000ULL - fplen) + return true; + return false; +} + void do_sigreturn32(struct pt_regs *regs) { struct signal_frame32 __user *sf; compat_uptr_t fpu_save; compat_uptr_t rwin_save; - unsigned int psr; + unsigned int psr, ufp; unsigned pc, npc; sigset_t set; compat_sigset_t seta; @@ -158,11 +170,16 @@ void do_sigreturn32(struct pt_regs *regs) sf = (struct signal_frame32 __user *) regs->u_regs[UREG_FP]; /* 1. Make sure we are not getting garbage from the user */ - if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) || - (((unsigned long) sf) & 3)) + if (invalid_frame_pointer(sf, sizeof(*sf))) + goto segv; + + if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP])) + goto segv; + + if (ufp & 0x7) goto segv; - if (get_user(pc, &sf->info.si_regs.pc) || + if (__get_user(pc, &sf->info.si_regs.pc) || __get_user(npc, &sf->info.si_regs.npc)) goto segv; @@ -227,7 +244,7 @@ void do_sigreturn32(struct pt_regs *regs) asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) { struct rt_signal_frame32 __user *sf; - unsigned int psr, pc, npc; + unsigned int psr, pc, npc, ufp; compat_uptr_t fpu_save; compat_uptr_t rwin_save; sigset_t set; @@ -242,11 +259,16 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) sf = (struct rt_signal_frame32 __user *) regs->u_regs[UREG_FP]; /* 1. Make sure we are not getting garbage from the user */ - if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) || - (((unsigned long) sf) & 3)) + if (invalid_frame_pointer(sf, sizeof(*sf))) goto segv; - if (get_user(pc, &sf->regs.pc) || + if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) + goto segv; + + if (ufp & 0x7) + goto segv; + + if (__get_user(pc, &sf->regs.pc) || __get_user(npc, &sf->regs.npc)) goto segv; @@ -307,14 +329,6 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) force_sig(SIGSEGV, current); } -/* Checks if the fp is valid */ -static int invalid_frame_pointer(void __user *fp, int fplen) -{ - if ((((unsigned long) fp) & 7) || ((unsigned long)fp) > 0x100000000ULL - fplen) - return 1; - return 0; -} - static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) { unsigned long sp; diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 52aa5e4ce5e77c..c3c12efe0bc004 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -60,10 +60,22 @@ struct rt_signal_frame { #define SF_ALIGNEDSZ (((sizeof(struct signal_frame) + 7) & (~7))) #define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame) + 7) & (~7))) +/* Checks if the fp is valid. We always build signal frames which are + * 16-byte aligned, therefore we can always enforce that the restore + * frame has that property as well. + */ +static inline bool invalid_frame_pointer(void __user *fp, int fplen) +{ + if ((((unsigned long) fp) & 15) || !__access_ok((unsigned long)fp, fplen)) + return true; + + return false; +} + asmlinkage void do_sigreturn(struct pt_regs *regs) { + unsigned long up_psr, pc, npc, ufp; struct signal_frame __user *sf; - unsigned long up_psr, pc, npc; sigset_t set; __siginfo_fpu_t __user *fpu_save; __siginfo_rwin_t __user *rwin_save; @@ -77,10 +89,13 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) sf = (struct signal_frame __user *) regs->u_regs[UREG_FP]; /* 1. Make sure we are not getting garbage from the user */ - if (!access_ok(VERIFY_READ, sf, sizeof(*sf))) + if (!invalid_frame_pointer(sf, sizeof(*sf))) + goto segv_and_exit; + + if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP])) goto segv_and_exit; - if (((unsigned long) sf) & 3) + if (ufp & 0x7) goto segv_and_exit; err = __get_user(pc, &sf->info.si_regs.pc); @@ -127,7 +142,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) asmlinkage void do_rt_sigreturn(struct pt_regs *regs) { struct rt_signal_frame __user *sf; - unsigned int psr, pc, npc; + unsigned int psr, pc, npc, ufp; __siginfo_fpu_t __user *fpu_save; __siginfo_rwin_t __user *rwin_save; sigset_t set; @@ -135,8 +150,13 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) synchronize_user_stack(); sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP]; - if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) || - (((unsigned long) sf) & 0x03)) + if (!invalid_frame_pointer(sf, sizeof(*sf))) + goto segv; + + if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) + goto segv; + + if (ufp & 0x7) goto segv; err = __get_user(pc, &sf->regs.pc); @@ -178,15 +198,6 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) force_sig(SIGSEGV, current); } -/* Checks if the fp is valid */ -static inline int invalid_frame_pointer(void __user *fp, int fplen) -{ - if ((((unsigned long) fp) & 7) || !__access_ok((unsigned long)fp, fplen)) - return 1; - - return 0; -} - static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) { unsigned long sp = regs->u_regs[UREG_FP]; diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 39aaec173f66eb..5ee930c48f4c95 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -234,6 +234,17 @@ asmlinkage void sparc64_get_context(struct pt_regs *regs) goto out; } +/* Checks if the fp is valid. We always build rt signal frames which + * are 16-byte aligned, therefore we can always enforce that the + * restore frame has that property as well. + */ +static bool invalid_frame_pointer(void __user *fp) +{ + if (((unsigned long) fp) & 15) + return true; + return false; +} + struct rt_signal_frame { struct sparc_stackf ss; siginfo_t info; @@ -246,8 +257,8 @@ struct rt_signal_frame { void do_rt_sigreturn(struct pt_regs *regs) { + unsigned long tpc, tnpc, tstate, ufp; struct rt_signal_frame __user *sf; - unsigned long tpc, tnpc, tstate; __siginfo_fpu_t __user *fpu_save; __siginfo_rwin_t __user *rwin_save; sigset_t set; @@ -261,10 +272,16 @@ void do_rt_sigreturn(struct pt_regs *regs) (regs->u_regs [UREG_FP] + STACK_BIAS); /* 1. Make sure we are not getting garbage from the user */ - if (((unsigned long) sf) & 3) + if (invalid_frame_pointer(sf)) + goto segv; + + if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) goto segv; - err = get_user(tpc, &sf->regs.tpc); + if ((ufp + STACK_BIAS) & 0x7) + goto segv; + + err = __get_user(tpc, &sf->regs.tpc); err |= __get_user(tnpc, &sf->regs.tnpc); if (test_thread_flag(TIF_32BIT)) { tpc &= 0xffffffff; @@ -308,14 +325,6 @@ void do_rt_sigreturn(struct pt_regs *regs) force_sig(SIGSEGV, current); } -/* Checks if the fp is valid */ -static int invalid_frame_pointer(void __user *fp) -{ - if (((unsigned long) fp) & 15) - return 1; - return 0; -} - static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) { unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS; diff --git a/arch/sparc/kernel/sigutil_32.c b/arch/sparc/kernel/sigutil_32.c index 0f6eebe71e6c91..e5fe8cef9a699a 100644 --- a/arch/sparc/kernel/sigutil_32.c +++ b/arch/sparc/kernel/sigutil_32.c @@ -48,6 +48,10 @@ int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) { int err; + + if (((unsigned long) fpu) & 3) + return -EFAULT; + #ifdef CONFIG_SMP if (test_tsk_thread_flag(current, TIF_USEDFPU)) regs->psr &= ~PSR_EF; @@ -97,7 +101,10 @@ int restore_rwin_state(__siginfo_rwin_t __user *rp) struct thread_info *t = current_thread_info(); int i, wsaved, err; - __get_user(wsaved, &rp->wsaved); + if (((unsigned long) rp) & 3) + return -EFAULT; + + get_user(wsaved, &rp->wsaved); if (wsaved > NSWINS) return -EFAULT; diff --git a/arch/sparc/kernel/sigutil_64.c b/arch/sparc/kernel/sigutil_64.c index 387834a9c56a51..36aadcbeac6946 100644 --- a/arch/sparc/kernel/sigutil_64.c +++ b/arch/sparc/kernel/sigutil_64.c @@ -37,7 +37,10 @@ int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) unsigned long fprs; int err; - err = __get_user(fprs, &fpu->si_fprs); + if (((unsigned long) fpu) & 7) + return -EFAULT; + + err = get_user(fprs, &fpu->si_fprs); fprs_write(0); regs->tstate &= ~TSTATE_PEF; if (fprs & FPRS_DL) @@ -72,7 +75,10 @@ int restore_rwin_state(__siginfo_rwin_t __user *rp) struct thread_info *t = current_thread_info(); int i, wsaved, err; - __get_user(wsaved, &rp->wsaved); + if (((unsigned long) rp) & 7) + return -EFAULT; + + get_user(wsaved, &rp->wsaved); if (wsaved > NSWINS) return -EFAULT; From 561e4453dd06f56cd8a61ced33964189b3651558 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 28 May 2016 20:41:12 -0700 Subject: [PATCH 067/433] sparc64: Fix return from trap window fill crashes. [ Upstream commit 7cafc0b8bf130f038b0ec2dcdd6a9de6dc59b65a ] We must handle data access exception as well as memory address unaligned exceptions from return from trap window fill faults, not just normal TLB misses. Otherwise we can get an OOPS that looks like this: ld-linux.so.2(36808): Kernel bad sw trap 5 [#1] CPU: 1 PID: 36808 Comm: ld-linux.so.2 Not tainted 4.6.0 #34 task: fff8000303be5c60 ti: fff8000301344000 task.ti: fff8000301344000 TSTATE: 0000004410001601 TPC: 0000000000a1a784 TNPC: 0000000000a1a788 Y: 00000002 Not tainted TPC: g0: fff8000024fc8248 g1: 0000000000db04dc g2: 0000000000000000 g3: 0000000000000001 g4: fff8000303be5c60 g5: fff800030e672000 g6: fff8000301344000 g7: 0000000000000001 o0: 0000000000b95ee8 o1: 000000000000012b o2: 0000000000000000 o3: 0000000200b9b358 o4: 0000000000000000 o5: fff8000301344040 sp: fff80003013475c1 ret_pc: 0000000000a1a77c RPC: l0: 00000000000007ff l1: 0000000000000000 l2: 000000000000005f l3: 0000000000000000 l4: fff8000301347e98 l5: fff8000024ff3060 l6: 0000000000000000 l7: 0000000000000000 i0: fff8000301347f60 i1: 0000000000102400 i2: 0000000000000000 i3: 0000000000000000 i4: 0000000000000000 i5: 0000000000000000 i6: fff80003013476a1 i7: 0000000000404d4c I7: Call Trace: [0000000000404d4c] user_rtt_fill_fixup+0x6c/0x7c The window trap handlers are slightly clever, the trap table entries for them are composed of two pieces of code. First comes the code that actually performs the window fill or spill trap handling, and then there are three instructions at the end which are for exception processing. The userland register window fill handler is: add %sp, STACK_BIAS + 0x00, %g1; \ ldxa [%g1 + %g0] ASI, %l0; \ mov 0x08, %g2; \ mov 0x10, %g3; \ ldxa [%g1 + %g2] ASI, %l1; \ mov 0x18, %g5; \ ldxa [%g1 + %g3] ASI, %l2; \ ldxa [%g1 + %g5] ASI, %l3; \ add %g1, 0x20, %g1; \ ldxa [%g1 + %g0] ASI, %l4; \ ldxa [%g1 + %g2] ASI, %l5; \ ldxa [%g1 + %g3] ASI, %l6; \ ldxa [%g1 + %g5] ASI, %l7; \ add %g1, 0x20, %g1; \ ldxa [%g1 + %g0] ASI, %i0; \ ldxa [%g1 + %g2] ASI, %i1; \ ldxa [%g1 + %g3] ASI, %i2; \ ldxa [%g1 + %g5] ASI, %i3; \ add %g1, 0x20, %g1; \ ldxa [%g1 + %g0] ASI, %i4; \ ldxa [%g1 + %g2] ASI, %i5; \ ldxa [%g1 + %g3] ASI, %i6; \ ldxa [%g1 + %g5] ASI, %i7; \ restored; \ retry; nop; nop; nop; nop; \ b,a,pt %xcc, fill_fixup_dax; \ b,a,pt %xcc, fill_fixup_mna; \ b,a,pt %xcc, fill_fixup; And the way this works is that if any of those memory accesses generate an exception, the exception handler can revector to one of those final three branch instructions depending upon which kind of exception the memory access took. In this way, the fault handler doesn't have to know if it was a spill or a fill that it's handling the fault for. It just always branches to the last instruction in the parent trap's handler. For example, for a regular fault, the code goes: winfix_trampoline: rdpr %tpc, %g3 or %g3, 0x7c, %g3 wrpr %g3, %tnpc done All window trap handlers are 0x80 aligned, so if we "or" 0x7c into the trap time program counter, we'll get that final instruction in the trap handler. On return from trap, we have to pull the register window in but we do this by hand instead of just executing a "restore" instruction for several reasons. The largest being that from Niagara and onward we simply don't have enough levels in the trap stack to fully resolve all possible exception cases of a window fault when we are already at trap level 1 (which we enter to get ready to return from the original trap). This is executed inline via the FILL_*_RTRAP handlers. rtrap_64.S's code branches directly to these to do the window fill by hand if necessary. Now if you look at them, we'll see at the end: ba,a,pt %xcc, user_rtt_fill_fixup; ba,a,pt %xcc, user_rtt_fill_fixup; ba,a,pt %xcc, user_rtt_fill_fixup; And oops, all three cases are handled like a fault. This doesn't work because each of these trap types (data access exception, memory address unaligned, and faults) store their auxiliary info in different registers to pass on to the C handler which does the real work. So in the case where the stack was unaligned, the unaligned trap handler sets up the arg registers one way, and then we branched to the fault handler which expects them setup another way. So the FAULT_TYPE_* value ends up basically being garbage, and randomly would generate the backtrace seen above. Reported-by: Nick Alcock Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/head_64.h | 4 ++ arch/sparc/include/asm/ttable.h | 8 +-- arch/sparc/kernel/Makefile | 1 + arch/sparc/kernel/rtrap_64.S | 57 +++---------------- arch/sparc/kernel/urtt_fill.S | 98 ++++++++++++++++++++++++++++++++ 5 files changed, 116 insertions(+), 52 deletions(-) create mode 100644 arch/sparc/kernel/urtt_fill.S diff --git a/arch/sparc/include/asm/head_64.h b/arch/sparc/include/asm/head_64.h index 10e9dabc4c4133..f0700cfeedd7b2 100644 --- a/arch/sparc/include/asm/head_64.h +++ b/arch/sparc/include/asm/head_64.h @@ -15,6 +15,10 @@ #define PTREGS_OFF (STACK_BIAS + STACKFRAME_SZ) +#define RTRAP_PSTATE (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE) +#define RTRAP_PSTATE_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV) +#define RTRAP_PSTATE_AG_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG) + #define __CHEETAH_ID 0x003e0014 #define __JALAPENO_ID 0x003e0016 #define __SERRANO_ID 0x003e0022 diff --git a/arch/sparc/include/asm/ttable.h b/arch/sparc/include/asm/ttable.h index 71b5a67522abb2..781b9f1dbdc2d2 100644 --- a/arch/sparc/include/asm/ttable.h +++ b/arch/sparc/include/asm/ttable.h @@ -589,8 +589,8 @@ user_rtt_fill_64bit: \ restored; \ nop; nop; nop; nop; nop; nop; \ nop; nop; nop; nop; nop; \ - ba,a,pt %xcc, user_rtt_fill_fixup; \ - ba,a,pt %xcc, user_rtt_fill_fixup; \ + ba,a,pt %xcc, user_rtt_fill_fixup_dax; \ + ba,a,pt %xcc, user_rtt_fill_fixup_mna; \ ba,a,pt %xcc, user_rtt_fill_fixup; @@ -652,8 +652,8 @@ user_rtt_fill_32bit: \ restored; \ nop; nop; nop; nop; nop; \ nop; nop; nop; \ - ba,a,pt %xcc, user_rtt_fill_fixup; \ - ba,a,pt %xcc, user_rtt_fill_fixup; \ + ba,a,pt %xcc, user_rtt_fill_fixup_dax; \ + ba,a,pt %xcc, user_rtt_fill_fixup_mna; \ ba,a,pt %xcc, user_rtt_fill_fixup; diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 7cf9c6ea3f1f21..fdb13327fded36 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -21,6 +21,7 @@ CFLAGS_REMOVE_perf_event.o := -pg CFLAGS_REMOVE_pcr.o := -pg endif +obj-$(CONFIG_SPARC64) += urtt_fill.o obj-$(CONFIG_SPARC32) += entry.o wof.o wuf.o obj-$(CONFIG_SPARC32) += etrap_32.o obj-$(CONFIG_SPARC32) += rtrap_32.o diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index d08bdaffdbfccb..216948ca43829d 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -14,10 +14,6 @@ #include #include -#define RTRAP_PSTATE (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE) -#define RTRAP_PSTATE_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV) -#define RTRAP_PSTATE_AG_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG) - #ifdef CONFIG_CONTEXT_TRACKING # define SCHEDULE_USER schedule_user #else @@ -242,52 +238,17 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 wrpr %g1, %cwp ba,a,pt %xcc, user_rtt_fill_64bit -user_rtt_fill_fixup: - rdpr %cwp, %g1 - add %g1, 1, %g1 - wrpr %g1, 0x0, %cwp - - rdpr %wstate, %g2 - sll %g2, 3, %g2 - wrpr %g2, 0x0, %wstate - - /* We know %canrestore and %otherwin are both zero. */ - - sethi %hi(sparc64_kern_pri_context), %g2 - ldx [%g2 + %lo(sparc64_kern_pri_context)], %g2 - mov PRIMARY_CONTEXT, %g1 - -661: stxa %g2, [%g1] ASI_DMMU - .section .sun4v_1insn_patch, "ax" - .word 661b - stxa %g2, [%g1] ASI_MMU - .previous - - sethi %hi(KERNBASE), %g1 - flush %g1 +user_rtt_fill_fixup_dax: + ba,pt %xcc, user_rtt_fill_fixup_common + mov 1, %g3 - or %g4, FAULT_CODE_WINFIXUP, %g4 - stb %g4, [%g6 + TI_FAULT_CODE] - stx %g5, [%g6 + TI_FAULT_ADDR] +user_rtt_fill_fixup_mna: + ba,pt %xcc, user_rtt_fill_fixup_common + mov 2, %g3 - mov %g6, %l1 - wrpr %g0, 0x0, %tl - -661: nop - .section .sun4v_1insn_patch, "ax" - .word 661b - SET_GL(0) - .previous - - wrpr %g0, RTRAP_PSTATE, %pstate - - mov %l1, %g6 - ldx [%g6 + TI_TASK], %g4 - LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) - call do_sparc64_fault - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop +user_rtt_fill_fixup: + ba,pt %xcc, user_rtt_fill_fixup_common + clr %g3 user_rtt_pre_restore: add %g1, 1, %g1 diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S new file mode 100644 index 00000000000000..5604a2b051d46b --- /dev/null +++ b/arch/sparc/kernel/urtt_fill.S @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include + + .text + .align 8 + .globl user_rtt_fill_fixup_common +user_rtt_fill_fixup_common: + rdpr %cwp, %g1 + add %g1, 1, %g1 + wrpr %g1, 0x0, %cwp + + rdpr %wstate, %g2 + sll %g2, 3, %g2 + wrpr %g2, 0x0, %wstate + + /* We know %canrestore and %otherwin are both zero. */ + + sethi %hi(sparc64_kern_pri_context), %g2 + ldx [%g2 + %lo(sparc64_kern_pri_context)], %g2 + mov PRIMARY_CONTEXT, %g1 + +661: stxa %g2, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g1] ASI_MMU + .previous + + sethi %hi(KERNBASE), %g1 + flush %g1 + + mov %g4, %l4 + mov %g5, %l5 + brnz,pn %g3, 1f + mov %g3, %l3 + + or %g4, FAULT_CODE_WINFIXUP, %g4 + stb %g4, [%g6 + TI_FAULT_CODE] + stx %g5, [%g6 + TI_FAULT_ADDR] +1: + mov %g6, %l1 + wrpr %g0, 0x0, %tl + +661: nop + .section .sun4v_1insn_patch, "ax" + .word 661b + SET_GL(0) + .previous + + wrpr %g0, RTRAP_PSTATE, %pstate + + mov %l1, %g6 + ldx [%g6 + TI_TASK], %g4 + LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) + + brnz,pn %l3, 1f + nop + + call do_sparc64_fault + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + +1: cmp %g3, 2 + bne,pn %xcc, 2f + nop + + sethi %hi(tlb_type), %g1 + lduw [%g1 + %lo(tlb_type)], %g1 + cmp %g1, 3 + bne,pt %icc, 1f + add %sp, PTREGS_OFF, %o0 + mov %l4, %o2 + call sun4v_do_mna + mov %l5, %o1 + ba,a,pt %xcc, rtrap +1: mov %l4, %o1 + mov %l5, %o2 + call mem_address_unaligned + nop + ba,a,pt %xcc, rtrap + +2: sethi %hi(tlb_type), %g1 + mov %l4, %o1 + lduw [%g1 + %lo(tlb_type)], %g1 + mov %l5, %o2 + cmp %g1, 3 + bne,pt %icc, 1f + add %sp, PTREGS_OFF, %o0 + call sun4v_data_access_exception + nop + ba,a,pt %xcc, rtrap + +1: call spitfire_data_access_exception + nop + ba,a,pt %xcc, rtrap From ccb85835a159923d7f79fd51cfd614962426ecf2 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 4 Feb 2016 01:24:40 +0100 Subject: [PATCH 068/433] MIPS: Fix 64k page support for 32 bit kernels. commit d7de413475f443957a0c1d256e405d19b3a2cb22 upstream. TASK_SIZE was defined as 0x7fff8000UL which for 64k pages is not a multiple of the page size. Somewhere further down the math fails such that executing an ELF binary fails. Signed-off-by: Ralf Baechle Tested-by: Joshua Henderson Cc: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 3f832c3dd8f5f1..041153f5cf9343 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -45,7 +45,7 @@ extern unsigned int vced_count, vcei_count; * User space process size: 2GB. This is hardcoded into a few places, * so don't change it unless you know what you are doing. */ -#define TASK_SIZE 0x7fff8000UL +#define TASK_SIZE 0x80000000UL #endif #define STACK_TOP_MAX TASK_SIZE From 59ff9f9b38b39afeb167fdc16c52976587b2c45a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Mar 2016 18:02:49 +0100 Subject: [PATCH 069/433] netfilter: x_tables: validate e->target_offset early commit bdf533de6968e9686df777dc178486f600c6e617 upstream. We should check that e->target_offset is sane before mark_source_chains gets called since it will fetch the target entry for loop detection. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/arp_tables.c | 17 ++++++++--------- net/ipv4/netfilter/ip_tables.c | 17 ++++++++--------- net/ipv6/netfilter/ip6_tables.c | 17 ++++++++--------- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 11dccba474b796..82b969b3567a0e 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -474,14 +474,12 @@ static int mark_source_chains(const struct xt_table_info *newinfo, return 1; } -static inline int check_entry(const struct arpt_entry *e, const char *name) +static inline int check_entry(const struct arpt_entry *e) { const struct xt_entry_target *t; - if (!arp_checkentry(&e->arp)) { - duprintf("arp_tables: arp check failed %p %s.\n", e, name); + if (!arp_checkentry(&e->arp)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) return -EINVAL; @@ -522,10 +520,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) struct xt_target *target; int ret; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -576,6 +570,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { @@ -590,6 +585,10 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_ARP_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -1246,7 +1245,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct arpt_entry *)e, name); + ret = check_entry((struct arpt_entry *)e); if (ret) return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index b99affad6ba1f4..ae55f23a851ce7 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -569,14 +569,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ipt_entry *e, const char *name) +check_entry(const struct ipt_entry *e) { const struct xt_entry_target *t; - if (!ip_checkentry(&e->ip)) { - duprintf("ip check failed %p %s.\n", e, name); + if (!ip_checkentry(&e->ip)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) @@ -666,10 +664,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -741,6 +735,7 @@ check_entry_size_and_hooks(struct ipt_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { @@ -755,6 +750,10 @@ check_entry_size_and_hooks(struct ipt_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -1506,7 +1505,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ipt_entry *)e, name); + ret = check_entry((struct ipt_entry *)e); if (ret) return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 99425cf2819b83..88221aba46391b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -581,14 +581,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ip6t_entry *e, const char *name) +check_entry(const struct ip6t_entry *e) { const struct xt_entry_target *t; - if (!ip6_checkentry(&e->ipv6)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); + if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) @@ -679,10 +677,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -753,6 +747,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { @@ -767,6 +762,10 @@ check_entry_size_and_hooks(struct ip6t_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -1518,7 +1517,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ip6t_entry *)e, name); + ret = check_entry((struct ip6t_entry *)e); if (ret) return ret; From 868fe2536f8741ebf807ed717734e6c321c478e9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Mar 2016 18:02:50 +0100 Subject: [PATCH 070/433] netfilter: x_tables: make sure e->next_offset covers remaining blob size commit 6e94e0cfb0887e4013b3b930fa6ab1fe6bb6ba91 upstream. Otherwise this function may read data beyond the ruleset blob. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/arp_tables.c | 6 ++++-- net/ipv4/netfilter/ip_tables.c | 6 ++++-- net/ipv6/netfilter/ip6_tables.c | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 82b969b3567a0e..ae12d7b0ba0964 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -573,7 +573,8 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, int err; if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || - (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { + (unsigned char *)e + sizeof(struct arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1232,7 +1233,8 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ae55f23a851ce7..0a81558c932ef4 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -738,7 +738,8 @@ check_entry_size_and_hooks(struct ipt_entry *e, int err; if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || - (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { + (unsigned char *)e + sizeof(struct ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1492,7 +1493,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 88221aba46391b..69a4ebec2012c5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -750,7 +750,8 @@ check_entry_size_and_hooks(struct ip6t_entry *e, int err; if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1504,7 +1505,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } From 5ebdccd7685f1c0b451c516f99082642d8d49003 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Mar 2016 18:02:52 +0100 Subject: [PATCH 071/433] netfilter: x_tables: fix unconditional helper commit 54d83fc74aa9ec72794373cb47432c5f7fb1a309 upstream. Ben Hawkes says: In the mark_source_chains function (net/ipv4/netfilter/ip_tables.c) it is possible for a user-supplied ipt_entry structure to have a large next_offset field. This field is not bounds checked prior to writing a counter value at the supplied offset. Problem is that mark_source_chains should not have been called -- the rule doesn't have a next entry, so its supposed to return an absolute verdict of either ACCEPT or DROP. However, the function conditional() doesn't work as the name implies. It only checks that the rule is using wildcard address matching. However, an unconditional rule must also not be using any matches (no -m args). The underflow validator only checked the addresses, therefore passing the 'unconditional absolute verdict' test, while mark_source_chains also tested for presence of matches, and thus proceeeded to the next (not-existent) rule. Unify this so that all the callers have same idea of 'unconditional rule'. Reported-by: Ben Hawkes Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/arp_tables.c | 18 +++++++++--------- net/ipv4/netfilter/ip_tables.c | 23 +++++++++++------------ net/ipv6/netfilter/ip6_tables.c | 23 +++++++++++------------ 3 files changed, 31 insertions(+), 33 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index ae12d7b0ba0964..36a30fab8625c6 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -359,11 +359,12 @@ unsigned int arpt_do_table(struct sk_buff *skb, } /* All zeroes == unconditional rule. */ -static inline bool unconditional(const struct arpt_arp *arp) +static inline bool unconditional(const struct arpt_entry *e) { static const struct arpt_arp uncond; - return memcmp(arp, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct arpt_entry) && + memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; } /* Figures out from what hook each rule can be called: returns 0 if @@ -402,11 +403,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct arpt_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && unconditional(&e->arp)) || - visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -551,7 +551,7 @@ static bool check_underflow(const struct arpt_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->arp)) + if (!unconditional(e)) return false; t = arpt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -598,9 +598,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 0a81558c932ef4..99d46b0a4eade0 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -168,11 +168,12 @@ get_entry(const void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline bool unconditional(const struct ipt_ip *ip) +static inline bool unconditional(const struct ipt_entry *e) { static const struct ipt_ip uncond; - return memcmp(ip, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct ipt_entry) && + memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; #undef FWINV } @@ -229,11 +230,10 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ipt_entry) && + if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && - t->verdict < 0 && - unconditional(&s->ip)) { + t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP_TRACE_COMMENT_POLICY] @@ -476,11 +476,10 @@ mark_source_chains(const struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ipt_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && unconditional(&e->ip)) || - visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -715,7 +714,7 @@ static bool check_underflow(const struct ipt_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->ip)) + if (!unconditional(e)) return false; t = ipt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -763,9 +762,9 @@ check_entry_size_and_hooks(struct ipt_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 69a4ebec2012c5..6198807e06f4b3 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -198,11 +198,12 @@ get_entry(const void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline bool unconditional(const struct ip6t_ip6 *ipv6) +static inline bool unconditional(const struct ip6t_entry *e) { static const struct ip6t_ip6 uncond; - return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct ip6t_entry) && + memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0; } static inline const struct xt_entry_target * @@ -258,11 +259,10 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ip6t_entry) && + if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && - t->verdict < 0 && - unconditional(&s->ipv6)) { + t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP6_TRACE_COMMENT_POLICY] @@ -488,11 +488,10 @@ mark_source_chains(const struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ip6t_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && - unconditional(&e->ipv6)) || visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -727,7 +726,7 @@ static bool check_underflow(const struct ip6t_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->ipv6)) + if (!unconditional(e)) return false; t = ip6t_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -775,9 +774,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; From dbea3ce55ad13ddccf519a95ce0d77a5064e9ccc Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Fri, 29 Apr 2016 10:43:40 -0700 Subject: [PATCH 072/433] crypto: qat - fix adf_ctl_drv.c:undefined reference to adf_init_pf_wq commit 6dc5df71ee5c8b44607928bfe27be50314dcf848 upstream. Fix undefined reference issue reported by kbuild test robot. Reported-by: kbuild test robot Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/qat/qat_common/adf_common_drv.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index b9178d0a3093fb..aa1dbeaa9b49d6 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -145,8 +145,6 @@ int adf_enable_aer(struct adf_accel_dev *accel_dev, struct pci_driver *adf); void adf_disable_aer(struct adf_accel_dev *accel_dev); int adf_init_aer(void); void adf_exit_aer(void); -int adf_init_pf_wq(void); -void adf_exit_pf_wq(void); int adf_init_admin_comms(struct adf_accel_dev *accel_dev); void adf_exit_admin_comms(struct adf_accel_dev *accel_dev); int adf_send_admin_init(struct adf_accel_dev *accel_dev); @@ -229,6 +227,8 @@ void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, uint32_t vf_mask); void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, uint32_t vf_mask); +int adf_init_pf_wq(void); +void adf_exit_pf_wq(void); #else static inline int adf_sriov_configure(struct pci_dev *pdev, int numvfs) { @@ -238,5 +238,14 @@ static inline int adf_sriov_configure(struct pci_dev *pdev, int numvfs) static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev) { } + +static inline int adf_init_pf_wq(void) +{ + return 0; +} + +static inline void adf_exit_pf_wq(void) +{ +} #endif #endif From 0d6ad54b74fd66ca016076900da97e96272ff83e Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 4 May 2016 14:38:26 +0200 Subject: [PATCH 073/433] drm/core: Do not preserve framebuffer on rmfb, v4. commit f2d580b9a8149735cbc4b59c4a8df60173658140 upstream. It turns out that preserving framebuffers after the rmfb call breaks vmwgfx userspace. This was originally introduced because it was thought nobody relied on the behavior, but unfortunately it seems there are exceptions. drm_framebuffer_remove may fail with -EINTR now, so a straight revert is impossible. There is no way to remove the framebuffer from the lists and active planes without introducing a race because of the different locking requirements. Instead call drm_framebuffer_remove from a workqueue, which is unaffected by signals. Changes since v1: - Add comment. Changes since v2: - Add fastpath for refcount = 1. (danvet) Changes since v3: - Rebased. - Restore lastclose framebuffer removal too. Fixes: 13803132818c ("drm/core: Preserve the framebuffer after removing it.") Testcase: kms_rmfb_basic References: https://lists.freedesktop.org/archives/dri-devel/2016-March/102876.html Cc: Thomas Hellstrom Cc: David Herrmann Reviewed-by: Daniel Vetter Tested-by: Thomas Hellstrom #v3 Tested-by: Tvrtko Ursulin Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/6c63ca37-0e7e-ac7f-a6d2-c7822e3d611f@linux.intel.com Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_crtc.c | 60 ++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 24c5434abd1c44..a02238c85e18d2 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -3316,6 +3316,24 @@ int drm_mode_addfb2(struct drm_device *dev, return 0; } +struct drm_mode_rmfb_work { + struct work_struct work; + struct list_head fbs; +}; + +static void drm_mode_rmfb_work_fn(struct work_struct *w) +{ + struct drm_mode_rmfb_work *arg = container_of(w, typeof(*arg), work); + + while (!list_empty(&arg->fbs)) { + struct drm_framebuffer *fb = + list_first_entry(&arg->fbs, typeof(*fb), filp_head); + + list_del_init(&fb->filp_head); + drm_framebuffer_remove(fb); + } +} + /** * drm_mode_rmfb - remove an FB from the configuration * @dev: drm device for the ioctl @@ -3356,7 +3374,25 @@ int drm_mode_rmfb(struct drm_device *dev, mutex_unlock(&dev->mode_config.fb_lock); mutex_unlock(&file_priv->fbs_lock); - drm_framebuffer_unreference(fb); + /* + * we now own the reference that was stored in the fbs list + * + * drm_framebuffer_remove may fail with -EINTR on pending signals, + * so run this in a separate stack as there's no way to correctly + * handle this after the fb is already removed from the lookup table. + */ + if (atomic_read(&fb->refcount.refcount) > 1) { + struct drm_mode_rmfb_work arg; + + INIT_WORK_ONSTACK(&arg.work, drm_mode_rmfb_work_fn); + INIT_LIST_HEAD(&arg.fbs); + list_add_tail(&fb->filp_head, &arg.fbs); + + schedule_work(&arg.work); + flush_work(&arg.work); + destroy_work_on_stack(&arg.work); + } else + drm_framebuffer_unreference(fb); return 0; @@ -3509,7 +3545,6 @@ int drm_mode_dirtyfb_ioctl(struct drm_device *dev, return ret; } - /** * drm_fb_release - remove and free the FBs on this file * @priv: drm file for the ioctl @@ -3524,6 +3559,9 @@ int drm_mode_dirtyfb_ioctl(struct drm_device *dev, void drm_fb_release(struct drm_file *priv) { struct drm_framebuffer *fb, *tfb; + struct drm_mode_rmfb_work arg; + + INIT_LIST_HEAD(&arg.fbs); /* * When the file gets released that means no one else can access the fb @@ -3536,10 +3574,22 @@ void drm_fb_release(struct drm_file *priv) * at it any more. */ list_for_each_entry_safe(fb, tfb, &priv->fbs, filp_head) { - list_del_init(&fb->filp_head); + if (atomic_read(&fb->refcount.refcount) > 1) { + list_move_tail(&fb->filp_head, &arg.fbs); + } else { + list_del_init(&fb->filp_head); - /* This drops the fpriv->fbs reference. */ - drm_framebuffer_unreference(fb); + /* This drops the fpriv->fbs reference. */ + drm_framebuffer_unreference(fb); + } + } + + if (!list_empty(&arg.fbs)) { + INIT_WORK_ONSTACK(&arg.work, drm_mode_rmfb_work_fn); + + schedule_work(&arg.work); + flush_work(&arg.work); + destroy_work_on_stack(&arg.work); } } From d6f7cd1b21b9e797e09269ee16655f9c0e4a3fa1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:21 +0200 Subject: [PATCH 074/433] netfilter: x_tables: don't move to non-existent next rule commit f24e230d257af1ad7476c6e81a8dc3127a74204e upstream. Ben Hawkes says: In the mark_source_chains function (net/ipv4/netfilter/ip_tables.c) it is possible for a user-supplied ipt_entry structure to have a large next_offset field. This field is not bounds checked prior to writing a counter value at the supplied offset. Base chains enforce absolute verdict. User defined chains are supposed to end with an unconditional return, xtables userspace adds them automatically. But if such return is missing we will move to non-existent next rule. Reported-by: Ben Hawkes Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/arp_tables.c | 8 +++++--- net/ipv4/netfilter/ip_tables.c | 4 ++++ net/ipv6/netfilter/ip6_tables.c | 4 ++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 36a30fab8625c6..e50b707c39c044 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -439,6 +439,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct arpt_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -461,6 +463,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct arpt_entry *) (entry0 + newpos); @@ -691,10 +695,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, } } - if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) { - duprintf("Looping hook\n"); + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) return -ELOOP; - } /* Finally, each sanity check must pass */ i = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 99d46b0a4eade0..2035c1f4cc260d 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -520,6 +520,8 @@ mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct ipt_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -541,6 +543,8 @@ mark_source_chains(const struct xt_table_info *newinfo, } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct ipt_entry *) (entry0 + newpos); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6198807e06f4b3..2f03d2141ae783 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -532,6 +532,8 @@ mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct ip6t_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -553,6 +555,8 @@ mark_source_chains(const struct xt_table_info *newinfo, } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct ip6t_entry *) (entry0 + newpos); From 611d408a531fdbecf07e268ac87e37d71dd5cd8e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:22 +0200 Subject: [PATCH 075/433] netfilter: x_tables: validate targets of jumps commit 36472341017529e2b12573093cc0f68719300997 upstream. When we see a jump also check that the offset gets us to beginning of a rule (an ipt_entry). The extra overhead is negible, even with absurd cases. 300k custom rules, 300k jumps to 'next' user chain: [ plus one jump from INPUT to first userchain ]: Before: real 0m24.874s user 0m7.532s sys 0m16.076s After: real 0m27.464s user 0m7.436s sys 0m18.840s Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/arp_tables.c | 16 ++++++++++++++++ net/ipv4/netfilter/ip_tables.c | 16 ++++++++++++++++ net/ipv6/netfilter/ip6_tables.c | 16 ++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e50b707c39c044..7f3cf5f9d2ecae 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -367,6 +367,18 @@ static inline bool unconditional(const struct arpt_entry *e) memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; } +static bool find_jump_target(const struct xt_table_info *t, + const struct arpt_entry *target) +{ + struct arpt_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; +} + /* Figures out from what hook each rule can be called: returns 0 if * there are loops. Puts hook bitmask in comefrom. */ @@ -460,6 +472,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo, /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e = (struct arpt_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 2035c1f4cc260d..fa7c6d01c2e673 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -443,6 +443,18 @@ ipt_do_table(struct sk_buff *skb, #endif } +static bool find_jump_target(const struct xt_table_info *t, + const struct ipt_entry *target) +{ + struct ipt_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; +} + /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int @@ -540,6 +552,10 @@ mark_source_chains(const struct xt_table_info *newinfo, /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e = (struct ipt_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2f03d2141ae783..ea30f5fdea5e4a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -455,6 +455,18 @@ ip6t_do_table(struct sk_buff *skb, #endif } +static bool find_jump_target(const struct xt_table_info *t, + const struct ip6t_entry *target) +{ + struct ip6t_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; +} + /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int @@ -552,6 +564,10 @@ mark_source_chains(const struct xt_table_info *newinfo, /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e = (struct ip6t_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; From cfdca13028ff3aa8c5f4b63ba5abf878cd55ced5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:23 +0200 Subject: [PATCH 076/433] netfilter: x_tables: add and use xt_check_entry_offsets commit 7d35812c3214afa5b37a675113555259cfd67b98 upstream. Currently arp/ip and ip6tables each implement a short helper to check that the target offset is large enough to hold one xt_entry_target struct and that t->u.target_size fits within the current rule. Unfortunately these checks are not sufficient. To avoid adding new tests to all of ip/ip6/arptables move the current checks into a helper, then extend this helper in followup patches. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter/x_tables.h | 4 ++++ net/ipv4/netfilter/arp_tables.c | 11 +--------- net/ipv4/netfilter/ip_tables.c | 12 +---------- net/ipv6/netfilter/ip6_tables.c | 12 +---------- net/netfilter/x_tables.c | 34 ++++++++++++++++++++++++++++++ 5 files changed, 41 insertions(+), 32 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c5577410c25d3b..1d2657fd29d92e 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -239,6 +239,10 @@ void xt_unregister_match(struct xt_match *target); int xt_register_matches(struct xt_match *match, unsigned int n); void xt_unregister_matches(struct xt_match *match, unsigned int n); +int xt_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset); + int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7f3cf5f9d2ecae..253a944ab4e5cf 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -496,19 +496,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo, static inline int check_entry(const struct arpt_entry *e) { - const struct xt_entry_target *t; - if (!arp_checkentry(&e->arp)) return -EINVAL; - if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) - return -EINVAL; - - t = arpt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; + return xt_check_entry_offsets(e, e->target_offset, e->next_offset); } static inline int check_target(struct arpt_entry *e, const char *name) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index fa7c6d01c2e673..39543d96f617a5 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -590,20 +590,10 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) static int check_entry(const struct ipt_entry *e) { - const struct xt_entry_target *t; - if (!ip_checkentry(&e->ip)) return -EINVAL; - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ipt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; + return xt_check_entry_offsets(e, e->target_offset, e->next_offset); } static int diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index ea30f5fdea5e4a..f5d928d46498da 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -602,20 +602,10 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) static int check_entry(const struct ip6t_entry *e) { - const struct xt_entry_target *t; - if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ip6t_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; + return xt_check_entry_offsets(e, e->target_offset, e->next_offset); } static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d4aaad747ea99b..24a9dfb51df23e 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -540,6 +540,40 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, EXPORT_SYMBOL_GPL(xt_compat_match_to_user); #endif /* CONFIG_COMPAT */ +/** + * xt_check_entry_offsets - validate arp/ip/ip6t_entry + * + * @base: pointer to arp/ip/ip6t_entry + * @target_offset: the arp/ip/ip6_t->target_offset + * @next_offset: the arp/ip/ip6_t->next_offset + * + * validates that target_offset and next_offset are sane. + * + * The arp/ip/ip6t_entry structure @base must have passed following tests: + * - it must point to a valid memory location + * - base to base + next_offset must be accessible, i.e. not exceed allocated + * length. + * + * Return: 0 on success, negative errno on failure. + */ +int xt_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset) +{ + const struct xt_entry_target *t; + const char *e = base; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(xt_check_entry_offsets); + int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { From 6bc803b795631cb14a6a3ea6d433589c8c666cc3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:24 +0200 Subject: [PATCH 077/433] netfilter: x_tables: kill check_entry helper commit aa412ba225dd3bc36d404c28cdc3d674850d80d0 upstream. Once we add more sanity testing to xt_check_entry_offsets it becomes relvant if we're expecting a 32bit 'config_compat' blob or a normal one. Since we already have a lot of similar-named functions (check_entry, compat_check_entry, find_and_check_entry, etc.) and the current incarnation is short just fold its contents into the callers. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/arp_tables.c | 19 ++++++++----------- net/ipv4/netfilter/ip_tables.c | 20 ++++++++------------ net/ipv6/netfilter/ip6_tables.c | 20 ++++++++------------ 3 files changed, 24 insertions(+), 35 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 253a944ab4e5cf..ae7344f245c888 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -494,14 +494,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo, return 1; } -static inline int check_entry(const struct arpt_entry *e) -{ - if (!arp_checkentry(&e->arp)) - return -EINVAL; - - return xt_check_entry_offsets(e, e->target_offset, e->next_offset); -} - static inline int check_target(struct arpt_entry *e, const char *name) { struct xt_entry_target *t = arpt_get_target(e); @@ -597,7 +589,10 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, return -EINVAL; } - err = check_entry(e); + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); if (err) return err; @@ -1255,8 +1250,10 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct arpt_entry *)e); + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 39543d96f617a5..ddff8f7e3bcec2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -587,15 +587,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) module_put(par.match->me); } -static int -check_entry(const struct ipt_entry *e) -{ - if (!ip_checkentry(&e->ip)) - return -EINVAL; - - return xt_check_entry_offsets(e, e->target_offset, e->next_offset); -} - static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { @@ -760,7 +751,10 @@ check_entry_size_and_hooks(struct ipt_entry *e, return -EINVAL; } - err = check_entry(e); + if (!ip_checkentry(&e->ip)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); if (err) return err; @@ -1515,8 +1509,10 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ipt_entry *)e); + if (!ip_checkentry(&e->ip)) + return -EINVAL; + + ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index f5d928d46498da..d6e3c019cf0a66 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -599,15 +599,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) module_put(par.match->me); } -static int -check_entry(const struct ip6t_entry *e) -{ - if (!ip6_checkentry(&e->ipv6)) - return -EINVAL; - - return xt_check_entry_offsets(e, e->target_offset, e->next_offset); -} - static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ip6t_ip6 *ipv6 = par->entryinfo; @@ -772,7 +763,10 @@ check_entry_size_and_hooks(struct ip6t_entry *e, return -EINVAL; } - err = check_entry(e); + if (!ip6_checkentry(&e->ipv6)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); if (err) return err; @@ -1527,8 +1521,10 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ip6t_entry *)e); + if (!ip6_checkentry(&e->ipv6)) + return -EINVAL; + + ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); if (ret) return ret; From ed30e07de077354993122c5d88e535cbe0a03754 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:25 +0200 Subject: [PATCH 078/433] netfilter: x_tables: assert minimum target size commit a08e4e190b866579896c09af59b3bdca821da2cd upstream. The target size includes the size of the xt_entry_target struct. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/x_tables.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 24a9dfb51df23e..e3e2dd4c55bf52 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -567,6 +567,9 @@ int xt_check_entry_offsets(const void *base, return -EINVAL; t = (void *)(e + target_offset); + if (t->u.target_size < sizeof(*t)) + return -EINVAL; + if (target_offset + t->u.target_size > next_offset) return -EINVAL; From 2985d199e713c05eec2eaffeeeac40682aa2e5cc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:26 +0200 Subject: [PATCH 079/433] netfilter: x_tables: add compat version of xt_check_entry_offsets commit fc1221b3a163d1386d1052184202d5dc50d302d1 upstream. 32bit rulesets have different layout and alignment requirements, so once more integrity checks get added to xt_check_entry_offsets it will reject well-formed 32bit rulesets. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter/x_tables.h | 3 +++ net/ipv4/netfilter/arp_tables.c | 3 ++- net/ipv4/netfilter/ip_tables.c | 3 ++- net/ipv6/netfilter/ip6_tables.c | 3 ++- net/netfilter/x_tables.c | 22 ++++++++++++++++++++++ 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1d2657fd29d92e..e3a3d059474221 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -492,6 +492,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size); int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size); +int xt_compat_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset); #endif /* CONFIG_COMPAT */ #endif /* _X_TABLES_H */ diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index ae7344f245c888..b05405fd9f9413 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1253,7 +1253,8 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, if (!arp_checkentry(&e->arp)) return -EINVAL; - ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + ret = xt_compat_check_entry_offsets(e, e->target_offset, + e->next_offset); if (ret) return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ddff8f7e3bcec2..b83a3369b8a4de 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1512,7 +1512,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, if (!ip_checkentry(&e->ip)) return -EINVAL; - ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + ret = xt_compat_check_entry_offsets(e, + e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d6e3c019cf0a66..d8035191ea1a9b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1524,7 +1524,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + ret = xt_compat_check_entry_offsets(e, + e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e3e2dd4c55bf52..cfdb9591cf2169 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -538,6 +538,27 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_to_user); + +int xt_compat_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset) +{ + const struct compat_xt_entry_target *t; + const char *e = base; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (t->u.target_size < sizeof(*t)) + return -EINVAL; + + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(xt_compat_check_entry_offsets); #endif /* CONFIG_COMPAT */ /** @@ -548,6 +569,7 @@ EXPORT_SYMBOL_GPL(xt_compat_match_to_user); * @next_offset: the arp/ip/ip6_t->next_offset * * validates that target_offset and next_offset are sane. + * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. * * The arp/ip/ip6t_entry structure @base must have passed following tests: * - it must point to a valid memory location From 2066499780e1455c43833b5b34858124047331ff Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:27 +0200 Subject: [PATCH 080/433] netfilter: x_tables: check standard target size too commit 7ed2abddd20cf8f6bd27f65bd218f26fa5bf7f44 upstream. We have targets and standard targets -- the latter carries a verdict. The ip/ip6tables validation functions will access t->verdict for the standard targets to fetch the jump offset or verdict for chainloop detection, but this happens before the targets get checked/validated. Thus we also need to check for verdict presence here, else t->verdict can point right after a blob. Spotted with UBSAN while testing malformed blobs. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/x_tables.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index cfdb9591cf2169..1f8cfe7379f701 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -539,6 +539,13 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, } EXPORT_SYMBOL_GPL(xt_compat_match_to_user); +/* non-compat version may have padding after verdict */ +struct compat_xt_standard_target { + struct compat_xt_entry_target t; + compat_uint_t verdict; +}; + +/* see xt_check_entry_offsets */ int xt_compat_check_entry_offsets(const void *base, unsigned int target_offset, unsigned int next_offset) @@ -556,6 +563,10 @@ int xt_compat_check_entry_offsets(const void *base, if (target_offset + t->u.target_size > next_offset) return -EINVAL; + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && + target_offset + sizeof(struct compat_xt_standard_target) != next_offset) + return -EINVAL; + return 0; } EXPORT_SYMBOL(xt_compat_check_entry_offsets); @@ -595,6 +606,10 @@ int xt_check_entry_offsets(const void *base, if (target_offset + t->u.target_size > next_offset) return -EINVAL; + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && + target_offset + sizeof(struct xt_standard_target) != next_offset) + return -EINVAL; + return 0; } EXPORT_SYMBOL(xt_check_entry_offsets); From 8a865621540c7bc7f03665a2b7029cb444a9593a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:28 +0200 Subject: [PATCH 081/433] netfilter: x_tables: check for bogus target offset commit ce683e5f9d045e5d67d1312a42b359cb2ab2a13c upstream. We're currently asserting that targetoff + targetsize <= nextoff. Extend it to also check that targetoff is >= sizeof(xt_entry). Since this is generic code, add an argument pointing to the start of the match/target, we can then derive the base structure size from the delta. We also need the e->elems pointer in a followup change to validate matches. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter/x_tables.h | 4 ++-- net/ipv4/netfilter/arp_tables.c | 5 +++-- net/ipv4/netfilter/ip_tables.c | 5 +++-- net/ipv6/netfilter/ip6_tables.c | 5 +++-- net/netfilter/x_tables.c | 17 +++++++++++++++-- 5 files changed, 26 insertions(+), 10 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index e3a3d059474221..41b38534293cec 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -239,7 +239,7 @@ void xt_unregister_match(struct xt_match *target); int xt_register_matches(struct xt_match *match, unsigned int n); void xt_unregister_matches(struct xt_match *match, unsigned int n); -int xt_check_entry_offsets(const void *base, +int xt_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); @@ -492,7 +492,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size); int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size); -int xt_compat_check_entry_offsets(const void *base, +int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b05405fd9f9413..fa907f24616e69 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -592,7 +592,8 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, if (!arp_checkentry(&e->arp)) return -EINVAL; - err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -1253,7 +1254,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, if (!arp_checkentry(&e->arp)) return -EINVAL; - ret = xt_compat_check_entry_offsets(e, e->target_offset, + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index b83a3369b8a4de..fa06250faf7fb2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -754,7 +754,8 @@ check_entry_size_and_hooks(struct ipt_entry *e, if (!ip_checkentry(&e->ip)) return -EINVAL; - err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -1512,7 +1513,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, if (!ip_checkentry(&e->ip)) return -EINVAL; - ret = xt_compat_check_entry_offsets(e, + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d8035191ea1a9b..84eb76fedf8ca2 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -766,7 +766,8 @@ check_entry_size_and_hooks(struct ip6t_entry *e, if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -1524,7 +1525,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - ret = xt_compat_check_entry_offsets(e, + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 1f8cfe7379f701..7b46dd6a20a598 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -545,14 +545,17 @@ struct compat_xt_standard_target { compat_uint_t verdict; }; -/* see xt_check_entry_offsets */ -int xt_compat_check_entry_offsets(const void *base, +int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset) { + long size_of_base_struct = elems - (const char *)base; const struct compat_xt_entry_target *t; const char *e = base; + if (target_offset < size_of_base_struct) + return -EINVAL; + if (target_offset + sizeof(*t) > next_offset) return -EINVAL; @@ -576,12 +579,16 @@ EXPORT_SYMBOL(xt_compat_check_entry_offsets); * xt_check_entry_offsets - validate arp/ip/ip6t_entry * * @base: pointer to arp/ip/ip6t_entry + * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems * @target_offset: the arp/ip/ip6_t->target_offset * @next_offset: the arp/ip/ip6_t->next_offset * * validates that target_offset and next_offset are sane. * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. * + * This function does not validate the targets or matches themselves, it + * only tests that all the offsets and sizes are correct. + * * The arp/ip/ip6t_entry structure @base must have passed following tests: * - it must point to a valid memory location * - base to base + next_offset must be accessible, i.e. not exceed allocated @@ -590,12 +597,18 @@ EXPORT_SYMBOL(xt_compat_check_entry_offsets); * Return: 0 on success, negative errno on failure. */ int xt_check_entry_offsets(const void *base, + const char *elems, unsigned int target_offset, unsigned int next_offset) { + long size_of_base_struct = elems - (const char *)base; const struct xt_entry_target *t; const char *e = base; + /* target start is within the ip/ip6/arpt_entry struct */ + if (target_offset < size_of_base_struct) + return -EINVAL; + if (target_offset + sizeof(*t) > next_offset) return -EINVAL; From caa39a1e709c3cf89c0a4d600e98f9863085e3d5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:29 +0200 Subject: [PATCH 082/433] netfilter: x_tables: validate all offsets and sizes in a rule commit 13631bfc604161a9d69cd68991dff8603edd66f9 upstream. Validate that all matches (if any) add up to the beginning of the target and that each match covers at least the base structure size. The compat path should be able to safely re-use the function as the structures only differ in alignment; added a BUILD_BUG_ON just in case we have an arch that adds padding as well. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/x_tables.c | 81 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 76 insertions(+), 5 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 7b46dd6a20a598..3d2cdcd5083289 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -415,6 +415,47 @@ int xt_check_match(struct xt_mtchk_param *par, } EXPORT_SYMBOL_GPL(xt_check_match); +/** xt_check_entry_match - check that matches end before start of target + * + * @match: beginning of xt_entry_match + * @target: beginning of this rules target (alleged end of matches) + * @alignment: alignment requirement of match structures + * + * Validates that all matches add up to the beginning of the target, + * and that each match covers at least the base structure size. + * + * Return: 0 on success, negative errno on failure. + */ +static int xt_check_entry_match(const char *match, const char *target, + const size_t alignment) +{ + const struct xt_entry_match *pos; + int length = target - match; + + if (length == 0) /* no matches */ + return 0; + + pos = (struct xt_entry_match *)match; + do { + if ((unsigned long)pos % alignment) + return -EINVAL; + + if (length < (int)sizeof(struct xt_entry_match)) + return -EINVAL; + + if (pos->u.match_size < sizeof(struct xt_entry_match)) + return -EINVAL; + + if (pos->u.match_size > length) + return -EINVAL; + + length -= pos->u.match_size; + pos = ((void *)((char *)(pos) + (pos)->u.match_size)); + } while (length > 0); + + return 0; +} + #ifdef CONFIG_COMPAT int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) { @@ -570,7 +611,14 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems, target_offset + sizeof(struct compat_xt_standard_target) != next_offset) return -EINVAL; - return 0; + /* compat_xt_entry match has less strict aligment requirements, + * otherwise they are identical. In case of padding differences + * we need to add compat version of xt_check_entry_match. + */ + BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match)); + + return xt_check_entry_match(elems, base + target_offset, + __alignof__(struct compat_xt_entry_match)); } EXPORT_SYMBOL(xt_compat_check_entry_offsets); #endif /* CONFIG_COMPAT */ @@ -583,17 +631,39 @@ EXPORT_SYMBOL(xt_compat_check_entry_offsets); * @target_offset: the arp/ip/ip6_t->target_offset * @next_offset: the arp/ip/ip6_t->next_offset * - * validates that target_offset and next_offset are sane. - * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. + * validates that target_offset and next_offset are sane and that all + * match sizes (if any) align with the target offset. * * This function does not validate the targets or matches themselves, it - * only tests that all the offsets and sizes are correct. + * only tests that all the offsets and sizes are correct, that all + * match structures are aligned, and that the last structure ends where + * the target structure begins. + * + * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. * * The arp/ip/ip6t_entry structure @base must have passed following tests: * - it must point to a valid memory location * - base to base + next_offset must be accessible, i.e. not exceed allocated * length. * + * A well-formed entry looks like this: + * + * ip(6)t_entry match [mtdata] match [mtdata] target [tgdata] ip(6)t_entry + * e->elems[]-----' | | + * matchsize | | + * matchsize | | + * | | + * target_offset---------------------------------' | + * next_offset---------------------------------------------------' + * + * elems[]: flexible array member at end of ip(6)/arpt_entry struct. + * This is where matches (if any) and the target reside. + * target_offset: beginning of target. + * next_offset: start of the next rule; also: size of this rule. + * Since targets have a minimum size, target_offset + minlen <= next_offset. + * + * Every match stores its size, sum of sizes must not exceed target_offset. + * * Return: 0 on success, negative errno on failure. */ int xt_check_entry_offsets(const void *base, @@ -623,7 +693,8 @@ int xt_check_entry_offsets(const void *base, target_offset + sizeof(struct xt_standard_target) != next_offset) return -EINVAL; - return 0; + return xt_check_entry_match(elems, base + target_offset, + __alignof__(struct xt_entry_match)); } EXPORT_SYMBOL(xt_check_entry_offsets); From fe1e4026ce9f03653288c743218ed70ee0a2c4e0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Jun 2016 02:04:44 +0200 Subject: [PATCH 083/433] netfilter: x_tables: don't reject valid target size on some architectures commit 7b7eba0f3515fca3296b8881d583f7c1042f5226 upstream. Quoting John Stultz: In updating a 32bit arm device from 4.6 to Linus' current HEAD, I noticed I was having some trouble with networking, and realized that /proc/net/ip_tables_names was suddenly empty. Digging through the registration process, it seems we're catching on the: if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && target_offset + sizeof(struct xt_standard_target) != next_offset) return -EINVAL; Where next_offset seems to be 4 bytes larger then the offset + standard_target struct size. next_offset needs to be aligned via XT_ALIGN (so we can access all members of ip(6)t_entry struct). This problem didn't show up on i686 as it only needs 4-byte alignment for u64, but iptables userspace on other 32bit arches does insert extra padding. Reported-by: John Stultz Tested-by: John Stultz Fixes: 7ed2abddd20cf ("netfilter: x_tables: check standard target size too") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/x_tables.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 3d2cdcd5083289..4cac7b65c742c5 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -608,7 +608,7 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems, return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - target_offset + sizeof(struct compat_xt_standard_target) != next_offset) + COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset) return -EINVAL; /* compat_xt_entry match has less strict aligment requirements, @@ -690,7 +690,7 @@ int xt_check_entry_offsets(const void *base, return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - target_offset + sizeof(struct xt_standard_target) != next_offset) + XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset) return -EINVAL; return xt_check_entry_match(elems, base + target_offset, From 946e8148dba1943e1762833694ec2c476469d1fe Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:32 +0200 Subject: [PATCH 084/433] netfilter: arp_tables: simplify translate_compat_table args commit 8dddd32756f6fe8e4e82a63361119b7e2384e02f upstream. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/arp_tables.c | 82 +++++++++++++++------------------ 1 file changed, 36 insertions(+), 46 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index fa907f24616e69..fc527638308546 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1213,6 +1213,18 @@ static int do_add_counters(struct net *net, const void __user *user, } #ifdef CONFIG_COMPAT +struct compat_arpt_replace { + char name[XT_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_ARP_NUMHOOKS]; + u32 underflow[NF_ARP_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; + struct compat_arpt_entry entries[0]; +}; + static inline void compat_release_entry(struct compat_arpt_entry *e) { struct xt_entry_target *t; @@ -1228,8 +1240,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, const unsigned char *base, const unsigned char *limit, const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned int *underflows) { struct xt_entry_target *t; struct xt_target *target; @@ -1300,7 +1311,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, static int compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; @@ -1333,14 +1344,9 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, return ret; } -static int translate_compat_table(const char *name, - unsigned int valid_hooks, - struct xt_table_info **pinfo, +static int translate_compat_table(struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_arpt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; @@ -1352,8 +1358,8 @@ static int translate_compat_table(const char *name, info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; + size = compatr->size; + info->number = compatr->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { @@ -1364,40 +1370,39 @@ static int translate_compat_table(const char *name, duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(NFPROTO_ARP); - xt_compat_init_offsets(NFPROTO_ARP, number); + xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size, + compatr->hook_entry, + compatr->underflow); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } /* Check hooks all assigned */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) + if (!(compatr->valid_hooks & (1 << i))) continue; if (info->hook_entry[i] == 0xFFFFFFFF) { duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); + i, info->hook_entry[i]); goto out_unlock; } if (info->underflow[i] == 0xFFFFFFFF) { duprintf("Invalid underflow %u %u\n", - i, underflows[i]); + i, info->underflow[i]); goto out_unlock; } } @@ -1407,17 +1412,17 @@ static int translate_compat_table(const char *name, if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) { ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); + newinfo, entry1); if (ret != 0) break; } @@ -1427,7 +1432,7 @@ static int translate_compat_table(const char *name, goto free_newinfo; ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) + if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) goto free_newinfo; i = 0; @@ -1438,7 +1443,7 @@ static int translate_compat_table(const char *name, break; } - ret = check_target(iter1, name); + ret = check_target(iter1, compatr->name); if (ret != 0) { xt_percpu_counter_free(iter1->counters.pcnt); break; @@ -1480,7 +1485,7 @@ static int translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); out: - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); @@ -1492,18 +1497,6 @@ static int translate_compat_table(const char *name, goto out; } -struct compat_arpt_replace { - char name[XT_TABLE_MAXNAMELEN]; - u32 valid_hooks; - u32 num_entries; - u32 size; - u32 hook_entry[NF_ARP_NUMHOOKS]; - u32 underflow[NF_ARP_NUMHOOKS]; - u32 num_counters; - compat_uptr_t counters; - struct compat_arpt_entry entries[0]; -}; - static int compat_do_replace(struct net *net, void __user *user, unsigned int len) { @@ -1536,10 +1529,7 @@ static int compat_do_replace(struct net *net, void __user *user, goto free_newinfo; } - ret = translate_compat_table(tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; From 77521be687870e2ecae748df7b8fb103dd57dac2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:30 +0200 Subject: [PATCH 085/433] netfilter: ip_tables: simplify translate_compat_table args commit 7d3f843eed29222254c9feab481f55175a1afcc9 upstream. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/ip_tables.c | 59 ++++++++++++++-------------------- 1 file changed, 24 insertions(+), 35 deletions(-) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index fa06250faf7fb2..242bf9a6799ccc 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1448,7 +1448,6 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, static int compat_find_calc_match(struct xt_entry_match *m, - const char *name, const struct ipt_ip *ip, int *size) { @@ -1485,8 +1484,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, const unsigned char *base, const unsigned char *limit, const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned int *underflows) { struct xt_entry_match *ematch; struct xt_entry_target *t; @@ -1522,7 +1520,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, &e->ip, &off); + ret = compat_find_calc_match(ematch, &e->ip, &off); if (ret != 0) goto release_matches; ++j; @@ -1571,7 +1569,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, static int compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; @@ -1654,14 +1652,9 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) static int translate_compat_table(struct net *net, - const char *name, - unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_ipt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; @@ -1673,8 +1666,8 @@ translate_compat_table(struct net *net, info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; + size = compatr->size; + info->number = compatr->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { @@ -1685,40 +1678,39 @@ translate_compat_table(struct net *net, duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET); - xt_compat_init_offsets(AF_INET, number); + xt_compat_init_offsets(AF_INET, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size, + compatr->hook_entry, + compatr->underflow); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) + if (!(compatr->valid_hooks & (1 << i))) continue; if (info->hook_entry[i] == 0xFFFFFFFF) { duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); + i, info->hook_entry[i]); goto out_unlock; } if (info->underflow[i] == 0xFFFFFFFF) { duprintf("Invalid underflow %u %u\n", - i, underflows[i]); + i, info->underflow[i]); goto out_unlock; } } @@ -1728,17 +1720,17 @@ translate_compat_table(struct net *net, if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) { ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); + newinfo, entry1); if (ret != 0) break; } @@ -1748,12 +1740,12 @@ translate_compat_table(struct net *net, goto free_newinfo; ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) + if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) goto free_newinfo; i = 0; xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, name); + ret = compat_check_entry(iter1, net, compatr->name); if (ret != 0) break; ++i; @@ -1793,7 +1785,7 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); out: - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); @@ -1838,10 +1830,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; From 0fab6d3d18866bbc4f964696ae31a5456795bb10 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:31 +0200 Subject: [PATCH 086/433] netfilter: ip6_tables: simplify translate_compat_table args commit 329a0807124f12fe1c8032f95d8a8eb47047fb0e upstream. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv6/netfilter/ip6_tables.c | 59 ++++++++++++++------------------- 1 file changed, 24 insertions(+), 35 deletions(-) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 84eb76fedf8ca2..72cec10884dc11 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1460,7 +1460,6 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, static int compat_find_calc_match(struct xt_entry_match *m, - const char *name, const struct ip6t_ip6 *ipv6, int *size) { @@ -1497,8 +1496,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, const unsigned char *base, const unsigned char *limit, const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned int *underflows) { struct xt_entry_match *ematch; struct xt_entry_target *t; @@ -1534,7 +1532,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, &e->ipv6, &off); + ret = compat_find_calc_match(ematch, &e->ipv6, &off); if (ret != 0) goto release_matches; ++j; @@ -1583,7 +1581,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, static int compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; @@ -1663,14 +1661,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, static int translate_compat_table(struct net *net, - const char *name, - unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_ip6t_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; @@ -1682,8 +1675,8 @@ translate_compat_table(struct net *net, info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; + size = compatr->size; + info->number = compatr->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { @@ -1694,40 +1687,39 @@ translate_compat_table(struct net *net, duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET6); - xt_compat_init_offsets(AF_INET6, number); + xt_compat_init_offsets(AF_INET6, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size, + compatr->hook_entry, + compatr->underflow); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) + if (!(compatr->valid_hooks & (1 << i))) continue; if (info->hook_entry[i] == 0xFFFFFFFF) { duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); + i, info->hook_entry[i]); goto out_unlock; } if (info->underflow[i] == 0xFFFFFFFF) { duprintf("Invalid underflow %u %u\n", - i, underflows[i]); + i, info->underflow[i]); goto out_unlock; } } @@ -1737,17 +1729,17 @@ translate_compat_table(struct net *net, if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) { ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); + newinfo, entry1); if (ret != 0) break; } @@ -1757,12 +1749,12 @@ translate_compat_table(struct net *net, goto free_newinfo; ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) + if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) goto free_newinfo; i = 0; xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, name); + ret = compat_check_entry(iter1, net, compatr->name); if (ret != 0) break; ++i; @@ -1802,7 +1794,7 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); out: - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); @@ -1847,10 +1839,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; From 3a69c0f0487a6eba5fd5005c7902a230c8e31518 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:33 +0200 Subject: [PATCH 087/433] netfilter: x_tables: xt_compat_match_from_user doesn't need a retval commit 0188346f21e6546498c2a0f84888797ad4063fc5 upstream. Always returned 0. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter/x_tables.h | 2 +- net/ipv4/netfilter/arp_tables.c | 17 +++++------------ net/ipv4/netfilter/ip_tables.c | 26 +++++++++----------------- net/ipv6/netfilter/ip6_tables.c | 27 +++++++++------------------ net/netfilter/x_tables.c | 5 ++--- 5 files changed, 26 insertions(+), 51 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 41b38534293cec..67de5ac4029b70 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -482,7 +482,7 @@ void xt_compat_init_offsets(u_int8_t af, unsigned int number); int xt_compat_calc_jump(u_int8_t af, unsigned int offset); int xt_compat_match_offset(const struct xt_match *match); -int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, unsigned int *size); int xt_compat_match_to_user(const struct xt_entry_match *m, void __user **dstptr, unsigned int *size); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index fc527638308546..858cce27dd30b9 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1309,7 +1309,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, return ret; } -static int +static void compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) @@ -1318,9 +1318,8 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, struct xt_target *target; struct arpt_entry *de; unsigned int origsize; - int ret, h; + int h; - ret = 0; origsize = *size; de = (struct arpt_entry *)*dstptr; memcpy(de, e, sizeof(struct arpt_entry)); @@ -1341,7 +1340,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; } static int translate_compat_table(struct xt_table_info **pinfo, @@ -1420,16 +1418,11 @@ static int translate_compat_table(struct xt_table_info **pinfo, entry1 = newinfo->entries; pos = entry1; size = compatr->size; - xt_entry_foreach(iter0, entry0, compatr->size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - newinfo, entry1); - if (ret != 0) - break; - } + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); xt_compat_flush_offsets(NFPROTO_ARP); xt_compat_unlock(NFPROTO_ARP); - if (ret) - goto free_newinfo; ret = -ELOOP; if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 242bf9a6799ccc..143c611239f0b9 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1567,7 +1567,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) @@ -1576,10 +1576,9 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, struct xt_target *target; struct ipt_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ipt_entry *)*dstptr; memcpy(de, e, sizeof(struct ipt_entry)); @@ -1588,11 +1587,9 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, *dstptr += sizeof(struct ipt_entry); *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ipt_get_target(e); target = t->u.kernel.target; @@ -1605,7 +1602,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; } static int @@ -1728,16 +1724,12 @@ translate_compat_table(struct net *net, entry1 = newinfo->entries; pos = entry1; size = compatr->size; - xt_entry_foreach(iter0, entry0, compatr->size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - newinfo, entry1); - if (ret != 0) - break; - } + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); - if (ret) - goto free_newinfo; ret = -ELOOP; if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 72cec10884dc11..c1629770d0c1bf 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1579,7 +1579,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) @@ -1587,10 +1587,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, struct xt_entry_target *t; struct ip6t_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ip6t_entry *)*dstptr; memcpy(de, e, sizeof(struct ip6t_entry)); @@ -1599,11 +1598,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, *dstptr += sizeof(struct ip6t_entry); *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ip6t_get_target(e); xt_compat_target_from_user(t, dstptr, size); @@ -1615,7 +1612,6 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; } static int compat_check_entry(struct ip6t_entry *e, struct net *net, @@ -1736,17 +1732,12 @@ translate_compat_table(struct net *net, } entry1 = newinfo->entries; pos = entry1; - size = compatr->size; - xt_entry_foreach(iter0, entry0, compatr->size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - newinfo, entry1); - if (ret != 0) - break; - } + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); - if (ret) - goto free_newinfo; ret = -ELOOP; if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 4cac7b65c742c5..3b9034c5513ed3 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -525,8 +525,8 @@ int xt_compat_match_offset(const struct xt_match *match) } EXPORT_SYMBOL_GPL(xt_compat_match_offset); -int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, - unsigned int *size) +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, + unsigned int *size) { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; @@ -548,7 +548,6 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, *size += off; *dstptr += msize; - return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_from_user); From d69f93d059c6294322bb91f9aaff796a21c3aa20 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:34 +0200 Subject: [PATCH 088/433] netfilter: x_tables: do compat validation via translate_table commit 09d9686047dbbe1cf4faa558d3ecc4aae2046054 upstream. This looks like refactoring, but its also a bug fix. Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few sanity tests that are done in the normal path. For example, we do not check for underflows and the base chain policies. While its possible to also add such checks to the compat path, its more copy&pastry, for instance we cannot reuse check_underflow() helper as e->target_offset differs in the compat case. Other problem is that it makes auditing for validation errors harder; two places need to be checked and kept in sync. At a high level 32 bit compat works like this: 1- initial pass over blob: validate match/entry offsets, bounds checking lookup all matches and targets do bookkeeping wrt. size delta of 32/64bit structures assign match/target.u.kernel pointer (points at kernel implementation, needed to access ->compatsize etc.) 2- allocate memory according to the total bookkeeping size to contain the translated ruleset 3- second pass over original blob: for each entry, copy the 32bit representation to the newly allocated memory. This also does any special match translations (e.g. adjust 32bit to 64bit longs, etc). 4- check if ruleset is free of loops (chase all jumps) 5-first pass over translated blob: call the checkentry function of all matches and targets. The alternative implemented by this patch is to drop steps 3&4 from the compat process, the translation is changed into an intermediate step rather than a full 1:1 translate_table replacement. In the 2nd pass (step #3), change the 64bit ruleset back to a kernel representation, i.e. put() the kernel pointer and restore ->u.user.name . This gets us a 64bit ruleset that is in the format generated by a 64bit iptables userspace -- we can then use translate_table() to get the 'native' sanity checks. This has two drawbacks: 1. we re-validate all the match and target entry structure sizes even though compat translation is supposed to never generate bogus offsets. 2. we put and then re-lookup each match and target. THe upside is that we get all sanity tests and ruleset validations provided by the normal path and can remove some duplicated compat code. iptables-restore time of autogenerated ruleset with 300k chains of form -A CHAIN0001 -m limit --limit 1/s -j CHAIN0002 -A CHAIN0002 -m limit --limit 1/s -j CHAIN0003 shows no noticeable differences in restore times: old: 0m30.796s new: 0m31.521s 64bit: 0m25.674s Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/arp_tables.c | 114 +++++------------------ net/ipv4/netfilter/ip_tables.c | 155 ++++++-------------------------- net/ipv6/netfilter/ip6_tables.c | 148 +++++------------------------- net/netfilter/x_tables.c | 8 ++ 4 files changed, 83 insertions(+), 342 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 858cce27dd30b9..5158846b55ef00 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1233,19 +1233,17 @@ static inline void compat_release_entry(struct compat_arpt_entry *e) module_put(t->u.kernel.target->me); } -static inline int +static int check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows) + const unsigned char *limit) { struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || @@ -1290,17 +1288,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, if (ret) goto release_target; - /* Check hooks & underflows */ - for (h = 0; h < NF_ARP_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; release_target: @@ -1350,7 +1337,7 @@ static int translate_compat_table(struct xt_table_info **pinfo, struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_arpt_entry *iter0; - struct arpt_entry *iter1; + struct arpt_replace repl; unsigned int size; int ret = 0; @@ -1359,12 +1346,6 @@ static int translate_compat_table(struct xt_table_info **pinfo, size = compatr->size; info->number = compatr->num_entries; - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } - duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(NFPROTO_ARP); @@ -1373,9 +1354,7 @@ static int translate_compat_table(struct xt_table_info **pinfo, xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + compatr->size, - compatr->hook_entry, - compatr->underflow); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; @@ -1388,23 +1367,6 @@ static int translate_compat_table(struct xt_table_info **pinfo, goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(compatr->valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, info->hook_entry[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, info->underflow[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) @@ -1421,55 +1383,26 @@ static int translate_compat_table(struct xt_table_info **pinfo, xt_entry_foreach(iter0, entry0, compatr->size) compat_copy_entry_from_user(iter0, &pos, &size, newinfo, entry1); + + /* all module references in entry0 are now gone */ + xt_compat_flush_offsets(NFPROTO_ARP); xt_compat_unlock(NFPROTO_ARP); - ret = -ELOOP; - if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) - goto free_newinfo; - - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - iter1->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(iter1->counters.pcnt)) { - ret = -ENOMEM; - break; - } + memcpy(&repl, compatr, sizeof(*compatr)); - ret = check_target(iter1, compatr->name); - if (ret != 0) { - xt_percpu_counter_free(iter1->counters.pcnt); - break; - } - ++i; - if (strcmp(arpt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1477,17 +1410,16 @@ static int translate_compat_table(struct xt_table_info **pinfo, free_newinfo: xt_free_table_info(newinfo); -out: + return ret; +out_unlock: + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(NFPROTO_ARP); - xt_compat_unlock(NFPROTO_ARP); - goto out; } static int compat_do_replace(struct net *net, void __user *user, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 143c611239f0b9..c2f7f58c001076 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1482,16 +1482,14 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows) + const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || @@ -1543,17 +1541,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, if (ret) goto out; - /* Check hooks & underflows */ - for (h = 0; h < NF_INET_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; out: @@ -1596,6 +1583,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); + for (h = 0; h < NF_INET_NUMHOOKS; h++) { if ((unsigned char *)de - base < newinfo->hook_entry[h]) newinfo->hook_entry[h] -= origsize - *size; @@ -1604,48 +1592,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, } } -static int -compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) -{ - struct xt_entry_match *ematch; - struct xt_mtchk_param mtpar; - unsigned int j; - int ret = 0; - - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) - return -ENOMEM; - - j = 0; - mtpar.net = net; - mtpar.table = name; - mtpar.entryinfo = &e->ip; - mtpar.hook_mask = e->comefrom; - mtpar.family = NFPROTO_IPV4; - xt_ematch_foreach(ematch, e) { - ret = check_match(ematch, &mtpar); - if (ret != 0) - goto cleanup_matches; - ++j; - } - - ret = check_target(e, net, name); - if (ret) - goto cleanup_matches; - return 0; - - cleanup_matches: - xt_ematch_foreach(ematch, e) { - if (j-- == 0) - break; - cleanup_match(ematch, net); - } - - xt_percpu_counter_free(e->counters.pcnt); - - return ret; -} - static int translate_compat_table(struct net *net, struct xt_table_info **pinfo, @@ -1656,7 +1602,7 @@ translate_compat_table(struct net *net, struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ipt_entry *iter0; - struct ipt_entry *iter1; + struct ipt_replace repl; unsigned int size; int ret; @@ -1665,12 +1611,6 @@ translate_compat_table(struct net *net, size = compatr->size; info->number = compatr->num_entries; - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } - duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET); @@ -1679,9 +1619,7 @@ translate_compat_table(struct net *net, xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + compatr->size, - compatr->hook_entry, - compatr->underflow); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; @@ -1694,23 +1632,6 @@ translate_compat_table(struct net *net, goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(compatr->valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, info->hook_entry[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, info->underflow[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) @@ -1718,8 +1639,8 @@ translate_compat_table(struct net *net, newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; @@ -1728,47 +1649,30 @@ translate_compat_table(struct net *net, compat_copy_entry_from_user(iter0, &pos, &size, newinfo, entry1); + /* all module references in entry0 are now gone. + * entry1/newinfo contains a 64bit ruleset that looks exactly as + * generated by 64bit userspace. + * + * Call standard translate_table() to validate all hook_entrys, + * underflows, check for loops, etc. + */ xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); - ret = -ELOOP; - if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, compatr->name); - if (ret != 0) - break; - ++i; - if (strcmp(ipt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1, net); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1776,17 +1680,16 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); -out: + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET); + xt_compat_unlock(AF_INET); xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(AF_INET); - xt_compat_unlock(AF_INET); - goto out; } static int diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index c1629770d0c1bf..ead2c78087e996 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1494,16 +1494,14 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows) + const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || @@ -1555,17 +1553,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, if (ret) goto out; - /* Check hooks & underflows */ - for (h = 0; h < NF_INET_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; out: @@ -1614,47 +1601,6 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, } } -static int compat_check_entry(struct ip6t_entry *e, struct net *net, - const char *name) -{ - unsigned int j; - int ret = 0; - struct xt_mtchk_param mtpar; - struct xt_entry_match *ematch; - - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) - return -ENOMEM; - j = 0; - mtpar.net = net; - mtpar.table = name; - mtpar.entryinfo = &e->ipv6; - mtpar.hook_mask = e->comefrom; - mtpar.family = NFPROTO_IPV6; - xt_ematch_foreach(ematch, e) { - ret = check_match(ematch, &mtpar); - if (ret != 0) - goto cleanup_matches; - ++j; - } - - ret = check_target(e, net, name); - if (ret) - goto cleanup_matches; - return 0; - - cleanup_matches: - xt_ematch_foreach(ematch, e) { - if (j-- == 0) - break; - cleanup_match(ematch, net); - } - - xt_percpu_counter_free(e->counters.pcnt); - - return ret; -} - static int translate_compat_table(struct net *net, struct xt_table_info **pinfo, @@ -1665,7 +1611,7 @@ translate_compat_table(struct net *net, struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ip6t_entry *iter0; - struct ip6t_entry *iter1; + struct ip6t_replace repl; unsigned int size; int ret = 0; @@ -1674,12 +1620,6 @@ translate_compat_table(struct net *net, size = compatr->size; info->number = compatr->num_entries; - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } - duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET6); @@ -1688,9 +1628,7 @@ translate_compat_table(struct net *net, xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + compatr->size, - compatr->hook_entry, - compatr->underflow); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; @@ -1703,23 +1641,6 @@ translate_compat_table(struct net *net, goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(compatr->valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, info->hook_entry[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, info->underflow[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) @@ -1727,56 +1648,34 @@ translate_compat_table(struct net *net, newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; + size = compatr->size; xt_entry_foreach(iter0, entry0, compatr->size) compat_copy_entry_from_user(iter0, &pos, &size, newinfo, entry1); + /* all module references in entry0 are now gone. */ xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); - ret = -ELOOP; - if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, compatr->name); - if (ret != 0) - break; - ++i; - if (strcmp(ip6t_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1, net); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1784,17 +1683,16 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); -out: + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET6); + xt_compat_unlock(AF_INET6); xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(AF_INET6); - xt_compat_unlock(AF_INET6); - goto out; } static int diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 3b9034c5513ed3..08c9e312bd78a4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -532,6 +532,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; int pad, off = xt_compat_match_offset(match); u_int16_t msize = cm->u.user.match_size; + char name[sizeof(m->u.user.name)]; m = *dstptr; memcpy(m, cm, sizeof(*cm)); @@ -545,6 +546,9 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, msize += off; m->u.user.match_size = msize; + strlcpy(name, match->name, sizeof(name)); + module_put(match->me); + strncpy(m->u.user.name, name, sizeof(m->u.user.name)); *size += off; *dstptr += msize; @@ -762,6 +766,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; int pad, off = xt_compat_target_offset(target); u_int16_t tsize = ct->u.user.target_size; + char name[sizeof(t->u.user.name)]; t = *dstptr; memcpy(t, ct, sizeof(*ct)); @@ -775,6 +780,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, tsize += off; t->u.user.target_size = tsize; + strlcpy(name, target->name, sizeof(name)); + module_put(target->me); + strncpy(t->u.user.name, name, sizeof(t->u.user.name)); *size += off; *dstptr += tsize; From e917563612e5d8ad3a80efa5f43e654be50fe82f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 15:37:59 +0200 Subject: [PATCH 089/433] netfilter: x_tables: introduce and use xt_copy_counters_from_user commit d7591f0c41ce3e67600a982bab6989ef0f07b3ce upstream. The three variants use same copy&pasted code, condense this into a helper and use that. Make sure info.name is 0-terminated. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter/x_tables.h | 3 ++ net/ipv4/netfilter/arp_tables.c | 48 ++----------------- net/ipv4/netfilter/ip_tables.c | 48 ++----------------- net/ipv6/netfilter/ip6_tables.c | 49 ++------------------ net/netfilter/x_tables.c | 74 ++++++++++++++++++++++++++++++ 5 files changed, 92 insertions(+), 130 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 67de5ac4029b70..04078e8a4803aa 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -248,6 +248,9 @@ int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); +void *xt_copy_counters_from_user(const void __user *user, unsigned int len, + struct xt_counters_info *info, bool compat); + struct xt_table *xt_register_table(struct net *net, const struct xt_table *table, struct xt_table_info *bootstrap, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 5158846b55ef00..6e3e0e8b1ce325 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1130,55 +1130,17 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct arpt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - t = xt_find_table_lock(net, NFPROTO_ARP, name); + t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1186,7 +1148,7 @@ static int do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c2f7f58c001076..a399c5419622d5 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1313,55 +1313,17 @@ do_add_counters(struct net *net, const void __user *user, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ipt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - t = xt_find_table_lock(net, AF_INET, name); + t = xt_find_table_lock(net, AF_INET, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1369,7 +1331,7 @@ do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index ead2c78087e996..22f39e00bef3d2 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1325,55 +1325,16 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ip6t_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - t = xt_find_table_lock(net, AF_INET6, name); + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); + t = xt_find_table_lock(net, AF_INET6, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1381,7 +1342,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 08c9e312bd78a4..25391fb2551622 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -751,6 +751,80 @@ int xt_check_target(struct xt_tgchk_param *par, } EXPORT_SYMBOL_GPL(xt_check_target); +/** + * xt_copy_counters_from_user - copy counters and metadata from userspace + * + * @user: src pointer to userspace memory + * @len: alleged size of userspace memory + * @info: where to store the xt_counters_info metadata + * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel + * + * Copies counter meta data from @user and stores it in @info. + * + * vmallocs memory to hold the counters, then copies the counter data + * from @user to the new memory and returns a pointer to it. + * + * If @compat is true, @info gets converted automatically to the 64bit + * representation. + * + * The metadata associated with the counters is stored in @info. + * + * Return: returns pointer that caller has to test via IS_ERR(). + * If IS_ERR is false, caller has to vfree the pointer. + */ +void *xt_copy_counters_from_user(const void __user *user, unsigned int len, + struct xt_counters_info *info, bool compat) +{ + void *mem; + u64 size; + +#ifdef CONFIG_COMPAT + if (compat) { + /* structures only differ in size due to alignment */ + struct compat_xt_counters_info compat_tmp; + + if (len <= sizeof(compat_tmp)) + return ERR_PTR(-EINVAL); + + len -= sizeof(compat_tmp); + if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0) + return ERR_PTR(-EFAULT); + + strlcpy(info->name, compat_tmp.name, sizeof(info->name)); + info->num_counters = compat_tmp.num_counters; + user += sizeof(compat_tmp); + } else +#endif + { + if (len <= sizeof(*info)) + return ERR_PTR(-EINVAL); + + len -= sizeof(*info); + if (copy_from_user(info, user, sizeof(*info)) != 0) + return ERR_PTR(-EFAULT); + + info->name[sizeof(info->name) - 1] = '\0'; + user += sizeof(*info); + } + + size = sizeof(struct xt_counters); + size *= info->num_counters; + + if (size != (u64)len) + return ERR_PTR(-EINVAL); + + mem = vmalloc(len); + if (!mem) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(mem, user, len) == 0) + return mem; + + vfree(mem); + return ERR_PTR(-EFAULT); +} +EXPORT_SYMBOL_GPL(xt_copy_counters_from_user); + #ifdef CONFIG_COMPAT int xt_compat_target_offset(const struct xt_target *target) { From 9ca1d50fa959cda1f04f43275f7930a70f1a631e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Jun 2016 10:18:38 -0700 Subject: [PATCH 090/433] Linux 4.4.14 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f4b33cdf991adc..fadbb9d73c6d40 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 13 +SUBLEVEL = 14 EXTRAVERSION = NAME = Blurry Fish Butt From b40d9bcad6420669ccf41f69278506125fa68428 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 3 May 2016 16:46:24 -0400 Subject: [PATCH 091/433] UPSTREAM: net: fix infoleak in rtnetlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry pick from commit 5f8e44741f9f216e33736ea4ec65ca9ac03036e6) The stack object “map” has a total size of 32 bytes. Its last 4 bytes are padding generated by compiler. These padding bytes are not initialized and sent out via “nla_put”. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller Bug: 28620102 Change-Id: Ica015c6a90d47e9188b1cd87a280ac6819dd9d09 --- net/core/rtnetlink.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 34ba7a08876de7..9c6d15756e7a84 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1174,14 +1174,16 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) { - struct rtnl_link_ifmap map = { - .mem_start = dev->mem_start, - .mem_end = dev->mem_end, - .base_addr = dev->base_addr, - .irq = dev->irq, - .dma = dev->dma, - .port = dev->if_port, - }; + struct rtnl_link_ifmap map; + + memset(&map, 0, sizeof(map)); + map.mem_start = dev->mem_start; + map.mem_end = dev->mem_end; + map.base_addr = dev->base_addr; + map.irq = dev->irq; + map.dma = dev->dma; + map.port = dev->if_port; + if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) return -EMSGSIZE; From 14dfd8269605b9dc3dcdd10945398d3822f07953 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 3 May 2016 16:44:07 -0400 Subject: [PATCH 092/433] UPSTREAM: ALSA: timer: Fix leak in SNDRV_TIMER_IOCTL_PARAMS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry pick from commit cec8f96e49d9be372fdb0c3836dcf31ec71e457e) The stack object “tread” has a total size of 32 bytes. Its field “event” and “val” both contain 4 bytes padding. These 8 bytes padding bytes are sent to user without being initialized. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai Bug: 28980557 Change-Id: Ibda2d126f6d72fedf797a98796c3cde7bb03db76 --- sound/core/timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index 8b06413e967e38..8b6c3605d2e986 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1680,6 +1680,7 @@ static int snd_timer_user_params(struct file *file, if (tu->timeri->flags & SNDRV_TIMER_IFLG_EARLY_EVENT) { if (tu->tread) { struct snd_timer_tread tread; + memset(&tread, 0, sizeof(tread)); tread.event = SNDRV_TIMER_EVENT_EARLY; tread.tstamp.tv_sec = 0; tread.tstamp.tv_nsec = 0; From 29d02a76f5ff4af4eeb5f1ef3b09cd5b48fc2662 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 3 May 2016 16:44:32 -0400 Subject: [PATCH 093/433] UPSTREAM: ALSA: timer: Fix leak in events via snd_timer_user_tinterrupt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry pick from commit e4ec8cc8039a7063e24204299b462bd1383184a5) The stack object “r1” has a total size of 32 bytes. Its field “event” and “val” both contain 4 bytes padding. These 8 bytes padding bytes are sent to user without being initialized. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai Bug: 28980217 Change-Id: If2bba3c9ffb4e57190583b0bb2524d3b2514b2a3 --- sound/core/timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index 8b6c3605d2e986..ded95e6f201fda 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1215,6 +1215,7 @@ static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri, } if ((tu->filter & (1 << SNDRV_TIMER_EVENT_RESOLUTION)) && tu->last_resolution != resolution) { + memset(&r1, 0, sizeof(r1)); r1.event = SNDRV_TIMER_EVENT_RESOLUTION; r1.tstamp = tstamp; r1.val = resolution; From 3d1e8cf7ef81f13d4da5218174d12a40e8bf1116 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 3 May 2016 16:44:20 -0400 Subject: [PATCH 094/433] UPSTREAM: ALSA: timer: Fix leak in events via snd_timer_user_ccallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry pick from commit 9a47e9cff994f37f7f0dbd9ae23740d0f64f9fe6) The stack object “r1” has a total size of 32 bytes. Its field “event” and “val” both contain 4 bytes padding. These 8 bytes padding bytes are sent to user without being initialized. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai Bug: 28980217 Change-Id: I2e4c27352894b9f1f4c808b8db3ae5f9284faec1 --- sound/core/timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index ded95e6f201fda..1701703b8d936a 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1181,6 +1181,7 @@ static void snd_timer_user_ccallback(struct snd_timer_instance *timeri, tu->tstamp = *tstamp; if ((tu->filter & (1 << event)) == 0 || !tu->tread) return; + memset(&r1, 0, sizeof(r1)); r1.event = event; r1.tstamp = *tstamp; r1.val = resolution; From 659740adf180779d0177a086656880bc7a43cfd1 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 3 May 2016 16:32:16 -0400 Subject: [PATCH 095/433] UPSTREAM: USB: usbfs: fix potential infoleak in devio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry pick from commit 681fef8380eb818c0b845fca5d2ab1dcbab114ee) The stack object “ci” has a total size of 8 bytes. Its last 3 bytes are padding bytes which are not initialized and leaked to userland via “copy_to_user”. Signed-off-by: Kangjie Lu Signed-off-by: Greg Kroah-Hartman Bug: 28619695 Change-Id: I170754d659d0891c075f85211b5e3970b114f097 --- drivers/usb/core/devio.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 38ae877c46e312..3ffb01ff654978 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1203,10 +1203,11 @@ static int proc_getdriver(struct usb_dev_state *ps, void __user *arg) static int proc_connectinfo(struct usb_dev_state *ps, void __user *arg) { - struct usbdevfs_connectinfo ci = { - .devnum = ps->dev->devnum, - .slow = ps->dev->speed == USB_SPEED_LOW - }; + struct usbdevfs_connectinfo ci; + + memset(&ci, 0, sizeof(ci)); + ci.devnum = ps->dev->devnum; + ci.slow = ps->dev->speed == USB_SPEED_LOW; if (copy_to_user(arg, &ci, sizeof(ci))) return -EFAULT; From 7dbe59704ab0dd0a07ae2c2b7b590bb2b8ccd6d9 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 23 Jun 2016 15:35:07 +0530 Subject: [PATCH 096/433] ANDROID: base-cfg: enable UID_CPUTIME Enabled UID_CPUTIME and dependent PROFILING config option. UID_CPUTIME (/proc/uid_cputime) interfaces provide amount of time a UID's processes spent executing in user-space and kernel-space. It is used by batterystats service. Signed-off-by: Amit Pundir --- android/configs/android-base.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index bdd99f2becfc5b..6496bb3961a251 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -137,6 +137,7 @@ CONFIG_PPP_BSDCOMP=y CONFIG_PPP_DEFLATE=y CONFIG_PPP_MPPE=y CONFIG_PREEMPT=y +CONFIG_PROFILING=y CONFIG_QUOTA=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y @@ -149,6 +150,7 @@ CONFIG_STAGING=y CONFIG_SWP_EMULATION=y CONFIG_SYNC=y CONFIG_TUN=y +CONFIG_UID_CPUTIME=y CONFIG_UNIX=y CONFIG_USB_GADGET=y CONFIG_USB_CONFIGFS=y From ef51a2254d6579560bf7ff8ebbeb9b3848fe10bf Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Thu, 7 Jan 2016 16:46:14 +0100 Subject: [PATCH 097/433] BACKPORT: PM / sleep: Go direct_complete if driver has no callbacks Backport notes: This resolves clk warnings in the designware i2c driver on HiKey seen during suspend/resume. Cherrypicked from: aa8e54b559479d0cb7eb632ba443b8cacd20cd4b If a suitable prepare callback cannot be found for a given device and its driver has no PM callbacks at all, assume that it can go direct to complete when the system goes to sleep. The reason for this is that there's lots of devices in a system that do no PM at all and there's no reason for them to prevent their ancestors to do direct_complete if they can support it. Change-Id: Ia773afb4b266f012336b99fc8cf87453839e078b Signed-off-by: Tomeu Vizoso Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki [jstultz: Backported to 4.4] Signed-off-by: John Stultz --- drivers/base/dd.c | 3 +++ drivers/base/power/main.c | 35 +++++++++++++++++++++++++++++++++++ drivers/base/power/power.h | 3 +++ include/linux/pm.h | 1 + 4 files changed, 42 insertions(+) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a641cf3ccad691..9e425fbf83cbe0 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -205,6 +205,8 @@ static void driver_bound(struct device *dev) klist_add_tail(&dev->p->knode_driver, &dev->driver->p->klist_devices); + device_pm_check_callbacks(dev); + /* * Make sure the device is no longer in one of the deferred lists and * kick off retrying all pending devices @@ -697,6 +699,7 @@ static void __device_release_driver(struct device *dev) dev->pm_domain->dismiss(dev); klist_remove(&dev->p->knode_driver); + device_pm_check_callbacks(dev); if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_UNBOUND_DRIVER, diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 5ea529165362fb..77379f3d136a42 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -126,6 +126,7 @@ void device_pm_add(struct device *dev) { pr_debug("PM: Adding info for %s:%s\n", dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); + device_pm_check_callbacks(dev); mutex_lock(&dpm_list_mtx); if (dev->parent && dev->parent->power.is_prepared) dev_warn(dev, "parent %s should not be sleeping\n", @@ -148,6 +149,7 @@ void device_pm_remove(struct device *dev) mutex_unlock(&dpm_list_mtx); device_wakeup_disable(dev); pm_runtime_remove(dev); + device_pm_check_callbacks(dev); } /** @@ -1574,6 +1576,11 @@ static int device_prepare(struct device *dev, pm_message_t state) dev->power.wakeup_path = device_may_wakeup(dev); + if (dev->power.no_pm_callbacks) { + ret = 1; /* Let device go direct_complete */ + goto unlock; + } + if (dev->pm_domain) { info = "preparing power domain "; callback = dev->pm_domain->ops.prepare; @@ -1596,6 +1603,7 @@ static int device_prepare(struct device *dev, pm_message_t state) if (callback) ret = callback(dev); +unlock: device_unlock(dev); if (ret < 0) { @@ -1724,3 +1732,30 @@ void dpm_for_each_dev(void *data, void (*fn)(struct device *, void *)) device_pm_unlock(); } EXPORT_SYMBOL_GPL(dpm_for_each_dev); + +static bool pm_ops_is_empty(const struct dev_pm_ops *ops) +{ + if (!ops) + return true; + + return !ops->prepare && + !ops->suspend && + !ops->suspend_late && + !ops->suspend_noirq && + !ops->resume_noirq && + !ops->resume_early && + !ops->resume && + !ops->complete; +} + +void device_pm_check_callbacks(struct device *dev) +{ + spin_lock_irq(&dev->power.lock); + dev->power.no_pm_callbacks = + (!dev->bus || pm_ops_is_empty(dev->bus->pm)) && + (!dev->class || pm_ops_is_empty(dev->class->pm)) && + (!dev->type || pm_ops_is_empty(dev->type->pm)) && + (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) && + (!dev->driver || pm_ops_is_empty(dev->driver->pm)); + spin_unlock_irq(&dev->power.lock); +} diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 998fa6b2308443..297beae6431459 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -123,6 +123,7 @@ extern void device_pm_remove(struct device *); extern void device_pm_move_before(struct device *, struct device *); extern void device_pm_move_after(struct device *, struct device *); extern void device_pm_move_last(struct device *); +extern void device_pm_check_callbacks(struct device *dev); #else /* !CONFIG_PM_SLEEP */ @@ -141,6 +142,8 @@ static inline void device_pm_move_after(struct device *deva, struct device *devb) {} static inline void device_pm_move_last(struct device *dev) {} +static inline void device_pm_check_callbacks(struct device *dev) {} + #endif /* !CONFIG_PM_SLEEP */ static inline void device_pm_init(struct device *dev) diff --git a/include/linux/pm.h b/include/linux/pm.h index 528be6787796b5..6a5d654f444726 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -573,6 +573,7 @@ struct dev_pm_info { struct wakeup_source *wakeup; bool wakeup_path:1; bool syscore:1; + bool no_pm_callbacks:1; /* Owned by the PM core */ #else unsigned int should_wakeup:1; #endif From e639a4e74d6cd2c01d4f2afbbe0b6e6e233b42d2 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 27 Jun 2016 13:33:59 -0700 Subject: [PATCH 098/433] Revert "usb: gadget: prevent change of Host MAC address of 'usb0' interface" This reverts commit 265801537d110eb68d44a2f66015479908f635c0. Signed-off-by: Badhri Jagan Sridharan --- drivers/usb/gadget/function/u_ether.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index dd73dfe5dcabc6..74e9f5b5a45dd6 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -863,8 +863,6 @@ static int eth_stop(struct net_device *net) /*-------------------------------------------------------------------------*/ -static u8 host_ethaddr[ETH_ALEN]; - static int get_ether_addr(const char *str, u8 *dev_addr) { if (str) { @@ -895,17 +893,6 @@ static int get_ether_addr_str(u8 dev_addr[ETH_ALEN], char *str, int len) return 18; } -static int get_host_ether_addr(u8 *str, u8 *dev_addr) -{ - memcpy(dev_addr, str, ETH_ALEN); - if (is_valid_ether_addr(dev_addr)) - return 0; - - random_ether_addr(dev_addr); - memcpy(str, dev_addr, ETH_ALEN); - return 1; -} - static const struct net_device_ops eth_netdev_ops = { .ndo_open = eth_open, .ndo_stop = eth_stop, @@ -963,11 +950,9 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, if (get_ether_addr(dev_addr, net->dev_addr)) dev_warn(&g->dev, "using random %s ethernet address\n", "self"); - - if (get_host_ether_addr(host_ethaddr, dev->host_mac)) - dev_warn(&g->dev, "using random %s ethernet address\n", "host"); - else - dev_warn(&g->dev, "using previous %s ethernet address\n", "host"); + if (get_ether_addr(host_addr, dev->host_mac)) + dev_warn(&g->dev, + "using random %s ethernet address\n", "host"); if (ethaddr) memcpy(ethaddr, dev->host_mac, ETH_ALEN); From 074c908972a215f63bb5409f6a317020fb137d5b Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 22 Jun 2016 16:49:48 +0800 Subject: [PATCH 099/433] netfilter: xt_quota2: make quota2_log work well In upstream commit 7200135bc1e61f1437dc326ae2ef2f310c50b4eb (netfilter: kill ulog targets) http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7200135bc1e6 ipt_ULOG target was removed, meanwhile, the IP_NF_TARGET_ULOG Kconfig and ipt_ULOG.h header file were removed too. This causes we cannot enable QUOTA2_LOG, and netd complains this error: "Unable to open quota socket". So when we reach the quota2 limit, userspace will not be notified with this event. Since IP_NF_TARGET_ULOG was removed, we need not depend on "IP_NF_TARGET_ULOG=n", and for compatibility, add ulog_packet_msg_t related definitions copied from "ipt_ULOG.h". Change-Id: I38132efaabf52bea75dfd736ce734a1b9690e87e Reported-by: Samboo Shen Signed-off-by: Liping Zhang --- net/netfilter/Kconfig | 1 - net/netfilter/xt_quota2.c | 21 ++++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 273f26b67653e6..1959548b11618d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1354,7 +1354,6 @@ config NETFILTER_XT_MATCH_QUOTA2 config NETFILTER_XT_MATCH_QUOTA2_LOG bool '"quota2" Netfilter LOG support' depends on NETFILTER_XT_MATCH_QUOTA2 - depends on IP_NF_TARGET_ULOG=n # not yes, not module, just no default n help This option allows `quota2' to log ONCE when a quota limit diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c index 99592ae56d9b0d..834594aa0085fd 100644 --- a/net/netfilter/xt_quota2.c +++ b/net/netfilter/xt_quota2.c @@ -21,8 +21,27 @@ #include #include + #ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG -#include +/* For compatibility, these definitions are copied from the + * deprecated header file */ +#define ULOG_MAC_LEN 80 +#define ULOG_PREFIX_LEN 32 + +/* Format of the ULOG packets passed through netlink */ +typedef struct ulog_packet_msg { + unsigned long mark; + long timestamp_sec; + long timestamp_usec; + unsigned int hook; + char indev_name[IFNAMSIZ]; + char outdev_name[IFNAMSIZ]; + size_t data_len; + char prefix[ULOG_PREFIX_LEN]; + unsigned char mac_len; + unsigned char mac[ULOG_MAC_LEN]; + unsigned char payload[0]; +} ulog_packet_msg_t; #endif /** From d6ef811017853661466366ab543b5a824f3c1e27 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 13 Jun 2016 17:14:02 -0700 Subject: [PATCH 100/433] reset: hi6220: Sync reset driver with Guodong's tree In Guodong's rebase tree, an updated version of the reset driver has been added, along with support to reset some of the clks. This is necessary for the future drm driver resync to work. Change-Id: Ie7b4987fa242ea3dbb0af91e729c11c9fb2503f8 Signed-off-by: John Stultz --- drivers/clk/hisilicon/clk-hi6220.c | 15 +- drivers/reset/hisilicon/hi6220_reset.c | 135 ++++++++++++------ .../dt-bindings/reset/hisi,hi6220-resets.h | 8 ++ 3 files changed, 106 insertions(+), 52 deletions(-) diff --git a/drivers/clk/hisilicon/clk-hi6220.c b/drivers/clk/hisilicon/clk-hi6220.c index effe2ed4073389..fe5ec1d0a11ba3 100644 --- a/drivers/clk/hisilicon/clk-hi6220.c +++ b/drivers/clk/hisilicon/clk-hi6220.c @@ -164,11 +164,11 @@ static const char *uart4_src[] __initdata = { "clk_tcxo", "clk_150m", }; static const char *hifi_src[] __initdata = { "syspll", "pll_media_gate", }; static struct hisi_gate_clock hi6220_separated_gate_clks_sys[] __initdata = { - { HI6220_MMC0_CLK, "mmc0_clk", "mmc0_rst_clk", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x200, 0, 0, }, + { HI6220_MMC0_CLK, "mmc0_clk", "mmc0_src", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x200, 0, 0, }, { HI6220_MMC0_CIUCLK, "mmc0_ciuclk", "mmc0_smp_in", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x200, 0, 0, }, - { HI6220_MMC1_CLK, "mmc1_clk", "mmc1_rst_clk", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x200, 1, 0, }, + { HI6220_MMC1_CLK, "mmc1_clk", "mmc1_src", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x200, 1, 0, }, { HI6220_MMC1_CIUCLK, "mmc1_ciuclk", "mmc1_smp_in", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x200, 1, 0, }, - { HI6220_MMC2_CLK, "mmc2_clk", "mmc2_rst_clk", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x200, 2, 0, }, + { HI6220_MMC2_CLK, "mmc2_clk", "mmc2_src", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x200, 2, 0, }, { HI6220_MMC2_CIUCLK, "mmc2_ciuclk", "mmc2_smp_in", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x200, 2, 0, }, { HI6220_USBOTG_HCLK, "usbotg_hclk", "clk_bus", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x200, 4, 0, }, { HI6220_CLK_PICOPHY, "clk_picophy", "cs_dapb", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x200, 5, 0, }, @@ -199,12 +199,6 @@ static struct hisi_gate_clock hi6220_separated_gate_clks_sys[] __initdata = { { HI6220_CS_ATB_SYSPLL, "cs_atb_syspll", "syspll", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 12, 0, }, }; -static struct hisi_gate_clock hi6220_reset_clks[] __initdata = { - { 0, "mmc0_rst_clk", "mmc0_src", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x304, 0, 0, }, - { 0, "mmc1_rst_clk", "mmc1_src", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x304, 1, 0, }, - { 0, "mmc2_rst_clk", "mmc2_src", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x304, 2, 0, }, -}; - static struct hisi_mux_clock hi6220_mux_clks_sys[] __initdata = { { HI6220_MMC0_SRC, "mmc0_src", mmc0_src_p, ARRAY_SIZE(mmc0_src_p), CLK_SET_RATE_PARENT, 0x4, 0, 1, 0, }, { HI6220_MMC0_SMP_IN, "mmc0_smp_in", mmc0_sample_in, ARRAY_SIZE(mmc0_sample_in), CLK_SET_RATE_PARENT, 0x4, 0, 1, 0, }, @@ -244,9 +238,6 @@ static void __init hi6220_clk_sys_init(struct device_node *np) if (!clk_data) return; - hisi_clk_register_gate(hi6220_reset_clks, - ARRAY_SIZE(hi6220_reset_clks), clk_data); - hisi_clk_register_gate_sep(hi6220_separated_gate_clks_sys, ARRAY_SIZE(hi6220_separated_gate_clks_sys), clk_data); diff --git a/drivers/reset/hisilicon/hi6220_reset.c b/drivers/reset/hisilicon/hi6220_reset.c index 7787a9b1cc67cb..ad708bdb2448fb 100644 --- a/drivers/reset/hisilicon/hi6220_reset.c +++ b/drivers/reset/hisilicon/hi6220_reset.c @@ -1,7 +1,7 @@ /* * Hisilicon Hi6220 reset controller driver * - * Copyright (c) 2015 Hisilicon Limited. + * Copyright (c) 2015-2016 Hisilicon Limited. * * Author: Feng Chen * @@ -15,83 +15,138 @@ #include #include #include +#include +#include +#include #include #include #include -#define ASSERT_OFFSET 0x300 -#define DEASSERT_OFFSET 0x304 -#define MAX_INDEX 0x509 +/* + * peripheral ctrl regs + */ +#define PERIPH_ASSERT_OFFSET 0x300 +#define PERIPH_DEASSERT_OFFSET 0x304 +#define PERIPH_MAX_INDEX 0x509 + +/* + * media ctrl regs + */ +#define SC_MEDIA_RSTEN 0x052C +#define SC_MEDIA_RSTDIS 0x0530 +#define MEDIA_MAX_INDEX 8 + +enum hi6220_reset_ctrl_type { + PERIPHERAL, + MEDIA, +}; #define to_reset_data(x) container_of(x, struct hi6220_reset_data, rc_dev) struct hi6220_reset_data { - void __iomem *assert_base; - void __iomem *deassert_base; - struct reset_controller_dev rc_dev; + struct reset_controller_dev rc_dev; + enum hi6220_reset_ctrl_type type; + struct regmap *regmap; }; -static int hi6220_reset_assert(struct reset_controller_dev *rc_dev, +static int hi6220_media_assert(struct reset_controller_dev *rc_dev, unsigned long idx) { struct hi6220_reset_data *data = to_reset_data(rc_dev); + struct regmap *regmap = data->regmap; - int bank = idx >> 8; - int offset = idx & 0xff; + return regmap_write(regmap, SC_MEDIA_RSTEN, BIT(idx)); +} - writel(BIT(offset), data->assert_base + (bank * 0x10)); +static int hi6220_media_deassert(struct reset_controller_dev *rc_dev, + unsigned long idx) +{ + struct hi6220_reset_data *data = to_reset_data(rc_dev); + struct regmap *regmap = data->regmap; - return 0; + return regmap_write(regmap, SC_MEDIA_RSTDIS, BIT(idx)); } -static int hi6220_reset_deassert(struct reset_controller_dev *rc_dev, - unsigned long idx) +static int hi6220_peripheral_assert(struct reset_controller_dev *rc_dev, + unsigned long idx) { struct hi6220_reset_data *data = to_reset_data(rc_dev); + struct regmap *regmap = data->regmap; + u32 bank = idx >> 8; + u32 offset = idx & 0xff; + u32 reg = PERIPH_ASSERT_OFFSET + bank * 0x10; - int bank = idx >> 8; - int offset = idx & 0xff; + return regmap_write(regmap, reg, BIT(offset)); +} - writel(BIT(offset), data->deassert_base + (bank * 0x10)); +static int hi6220_peripheral_deassert(struct reset_controller_dev *rc_dev, + unsigned long idx) +{ + struct hi6220_reset_data *data = to_reset_data(rc_dev); + struct regmap *regmap = data->regmap; + u32 bank = idx >> 8; + u32 offset = idx & 0xff; + u32 reg = PERIPH_DEASSERT_OFFSET + bank * 0x10; - return 0; + return regmap_write(regmap, reg, BIT(offset)); } -static struct reset_control_ops hi6220_reset_ops = { - .assert = hi6220_reset_assert, - .deassert = hi6220_reset_deassert, +static const struct reset_control_ops hi6220_media_reset_ops = { + .assert = hi6220_media_assert, + .deassert = hi6220_media_deassert, +}; + +static const struct reset_control_ops hi6220_peripheral_reset_ops = { + .assert = hi6220_peripheral_assert, + .deassert = hi6220_peripheral_deassert, }; static int hi6220_reset_probe(struct platform_device *pdev) { + struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; + enum hi6220_reset_ctrl_type type; struct hi6220_reset_data *data; - struct resource *res; - void __iomem *src_base; + struct regmap *regmap; - data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - src_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(src_base)) - return PTR_ERR(src_base); - - data->assert_base = src_base + ASSERT_OFFSET; - data->deassert_base = src_base + DEASSERT_OFFSET; - data->rc_dev.nr_resets = MAX_INDEX; - data->rc_dev.ops = &hi6220_reset_ops; - data->rc_dev.of_node = pdev->dev.of_node; - - reset_controller_register(&data->rc_dev); - - return 0; + type = (enum hi6220_reset_ctrl_type)of_device_get_match_data(dev); + + regmap = syscon_node_to_regmap(np); + if (IS_ERR(regmap)) { + dev_err(dev, "failed to get reset controller regmap\n"); + return PTR_ERR(regmap); + } + + data->type = type; + data->regmap = regmap; + data->rc_dev.of_node = np; + if (type == MEDIA) { + data->rc_dev.ops = &hi6220_media_reset_ops; + data->rc_dev.nr_resets = MEDIA_MAX_INDEX; + } else { + data->rc_dev.ops = &hi6220_peripheral_reset_ops; + data->rc_dev.nr_resets = PERIPH_MAX_INDEX; + } + + return reset_controller_register(&data->rc_dev); } static const struct of_device_id hi6220_reset_match[] = { - { .compatible = "hisilicon,hi6220-sysctrl" }, - { }, + { + .compatible = "hisilicon,hi6220-sysctrl", + .data = (void *)PERIPHERAL, + }, + { + .compatible = "hisilicon,hi6220-mediactrl", + .data = (void *)MEDIA, + }, + { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, hi6220_reset_match); static struct platform_driver hi6220_reset_driver = { .probe = hi6220_reset_probe, diff --git a/include/dt-bindings/reset/hisi,hi6220-resets.h b/include/dt-bindings/reset/hisi,hi6220-resets.h index ca08a7e5248e7b..322ec5335b65cf 100644 --- a/include/dt-bindings/reset/hisi,hi6220-resets.h +++ b/include/dt-bindings/reset/hisi,hi6220-resets.h @@ -64,4 +64,12 @@ #define PERIPH_RSDIST9_CARM_SOCDBG 0x507 #define PERIPH_RSDIST9_CARM_ETM 0x508 +#define MEDIA_G3D 0 +#define MEDIA_CODEC_VPU 2 +#define MEDIA_CODEC_JPEG 3 +#define MEDIA_ISP 4 +#define MEDIA_ADE 5 +#define MEDIA_MMU 6 +#define MEDIA_XG2RAM1 7 + #endif /*_DT_BINDINGS_RESET_CONTROLLER_HI6220*/ From de970ebfcb8eb3efb38e77384ac0fbf548f46c99 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 13 Jun 2016 17:15:14 -0700 Subject: [PATCH 101/433] hikey_defconfig: Add reset driver Enable the hi6220 reset driver in the hikey_defconfig. This is needed for the DRM resync to work properly. Change-Id: I538214fac7db9f576623033ffb774601cd021952 Signed-off-by: John Stultz --- arch/arm64/configs/hikey_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/hikey_defconfig b/arch/arm64/configs/hikey_defconfig index 8f975f2e8ef645..2fbdfd44d76246 100644 --- a/arch/arm64/configs/hikey_defconfig +++ b/arch/arm64/configs/hikey_defconfig @@ -439,6 +439,7 @@ CONFIG_MAILBOX=y CONFIG_HI6220_MBOX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_RESET_CONTROLLER=y +CONFIG_COMMON_RESET_HI6220=y CONFIG_PHY_HI6220_USB=y CONFIG_PHY_XGENE=y CONFIG_ANDROID=y From 8f87c2c92bff8ef336d1f46aab365906025bec68 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 13 Jun 2016 14:25:34 -0700 Subject: [PATCH 102/433] drm: hisi: Move and rename the hisi drm driver to kirin In preparation to sync with the driver that ended upstream, as well as what is used in Guodong's RPB builds, move the hisi_drm driver over to kirin. The only changes in the source are to include filenames, which changed in the move. Change-Id: Idfa30ffe0e5510c6b82aef60b2c18c77b96b41a6 Signed-off-by: John Stultz --- arch/arm64/configs/hikey_defconfig | 2 ++ drivers/gpu/drm/hisilicon/Kconfig | 5 +++++ drivers/gpu/drm/hisilicon/Makefile | 8 ++++---- drivers/gpu/drm/hisilicon/kirin/Kconfig | 19 +++++++++++++++++++ drivers/gpu/drm/hisilicon/kirin/Makefile | 6 ++++++ .../{hisi_drm_dsi.c => kirin/dw_drm_dsi.c} | 2 +- .../{hisi_dsi_reg.h => kirin/dw_dsi_reg.h} | 0 .../drm/hisilicon/{ => kirin}/hisi_drm_ade.h | 0 .../{hisi_ade_reg.h => kirin/kirin_ade_reg.h} | 0 .../{hisi_drm_ade.c => kirin/kirin_drm_ade.c} | 4 ++-- .../{hisi_drm_drv.c => kirin/kirin_drm_drv.c} | 2 +- .../{hisi_drm_drv.h => kirin/kirin_drm_drv.h} | 0 12 files changed, 40 insertions(+), 8 deletions(-) create mode 100644 drivers/gpu/drm/hisilicon/kirin/Kconfig create mode 100644 drivers/gpu/drm/hisilicon/kirin/Makefile rename drivers/gpu/drm/hisilicon/{hisi_drm_dsi.c => kirin/dw_drm_dsi.c} (99%) rename drivers/gpu/drm/hisilicon/{hisi_dsi_reg.h => kirin/dw_dsi_reg.h} (100%) rename drivers/gpu/drm/hisilicon/{ => kirin}/hisi_drm_ade.h (100%) rename drivers/gpu/drm/hisilicon/{hisi_ade_reg.h => kirin/kirin_ade_reg.h} (100%) rename drivers/gpu/drm/hisilicon/{hisi_drm_ade.c => kirin/kirin_drm_ade.c} (99%) rename drivers/gpu/drm/hisilicon/{hisi_drm_drv.c => kirin/kirin_drm_drv.c} (99%) rename drivers/gpu/drm/hisilicon/{hisi_drm_drv.h => kirin/kirin_drm_drv.h} (100%) diff --git a/arch/arm64/configs/hikey_defconfig b/arch/arm64/configs/hikey_defconfig index 2fbdfd44d76246..c14ca1c0d3d625 100644 --- a/arch/arm64/configs/hikey_defconfig +++ b/arch/arm64/configs/hikey_defconfig @@ -306,6 +306,8 @@ CONFIG_DRM=y CONFIG_DRM_CMA_FBDEV_BUFFER_NUM=2 CONFIG_DRM_I2C_ADV7511=y CONFIG_DRM_HISI=y +CONFIG_DRM_HISI_KIRIN=y +CONFIG_HISI_KIRIN_DW_DSI=y CONFIG_FB_ARMCLCD=y CONFIG_FB_SIMPLE=y CONFIG_BACKLIGHT_LCD_SUPPORT=y diff --git a/drivers/gpu/drm/hisilicon/Kconfig b/drivers/gpu/drm/hisilicon/Kconfig index f1c33c26b6640c..7503981715027d 100644 --- a/drivers/gpu/drm/hisilicon/Kconfig +++ b/drivers/gpu/drm/hisilicon/Kconfig @@ -8,3 +8,8 @@ config DRM_HISI help Choose this option if you have a hisilicon chipsets(hi6220). If M is selected the module will be called hisi-drm. +# +# hisilicon drm device configuration. +# Please keep this list sorted alphabetically + +source "drivers/gpu/drm/hisilicon/kirin/Kconfig" diff --git a/drivers/gpu/drm/hisilicon/Makefile b/drivers/gpu/drm/hisilicon/Makefile index 5083c1ffd43eda..e3f6d493c99692 100644 --- a/drivers/gpu/drm/hisilicon/Makefile +++ b/drivers/gpu/drm/hisilicon/Makefile @@ -1,5 +1,5 @@ -hisi-drm-y := hisi_drm_drv.o \ - hisi_drm_ade.o \ - hisi_drm_dsi.o +# +# Makefile for hisilicon drm drivers. +# Please keep this list sorted alphabetically -obj-$(CONFIG_DRM_HISI) += hisi-drm.o +obj-$(CONFIG_DRM_HISI_KIRIN) += kirin/ diff --git a/drivers/gpu/drm/hisilicon/kirin/Kconfig b/drivers/gpu/drm/hisilicon/kirin/Kconfig new file mode 100644 index 00000000000000..5dcbc8d8e313d6 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/kirin/Kconfig @@ -0,0 +1,19 @@ +config DRM_HISI_KIRIN + tristate "DRM Support for Hisilicon Kirin series SoCs Platform" + depends on DRM && OF && ARM64 + select DRM_KMS_HELPER + select DRM_GEM_CMA_HELPER + select DRM_KMS_CMA_HELPER + help + Choose this option if you have a hisilicon Kirin chipsets(hi6220). + If M is selected the module will be called kirin-drm. + +config HISI_KIRIN_DW_DSI + tristate "HiSilicon Kirin specific extensions for Synopsys DW MIPI DSI" + depends on DRM_HISI_KIRIN + select DRM_MIPI_DSI + select DRM_PANEL + help + This selects support for HiSilicon Kirin SoC specific extensions for + the Synopsys DesignWare DSI driver. If you want to enable MIPI DSI on + hi6220 based SoC, you should selet this option. diff --git a/drivers/gpu/drm/hisilicon/kirin/Makefile b/drivers/gpu/drm/hisilicon/kirin/Makefile new file mode 100644 index 00000000000000..cdf61589485c79 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/kirin/Makefile @@ -0,0 +1,6 @@ +kirin-drm-y := kirin_drm_drv.o \ + kirin_drm_ade.o + +obj-$(CONFIG_DRM_HISI_KIRIN) += kirin-drm.o + +obj-$(CONFIG_HISI_KIRIN_DW_DSI) += dw_drm_dsi.o diff --git a/drivers/gpu/drm/hisilicon/hisi_drm_dsi.c b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c similarity index 99% rename from drivers/gpu/drm/hisilicon/hisi_drm_dsi.c rename to drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c index 83c5445c260d52..fe94ccbfb5eb19 100644 --- a/drivers/gpu/drm/hisilicon/hisi_drm_dsi.c +++ b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c @@ -22,7 +22,7 @@ #include #include -#include "hisi_dsi_reg.h" +#include "dw_dsi_reg.h" #define MAX_TX_ESC_CLK (10) #define ROUND(x, y) ((x) / (y) + ((x) % (y) * 10 / (y) >= 5 ? 1 : 0)) diff --git a/drivers/gpu/drm/hisilicon/hisi_dsi_reg.h b/drivers/gpu/drm/hisilicon/kirin/dw_dsi_reg.h similarity index 100% rename from drivers/gpu/drm/hisilicon/hisi_dsi_reg.h rename to drivers/gpu/drm/hisilicon/kirin/dw_dsi_reg.h diff --git a/drivers/gpu/drm/hisilicon/hisi_drm_ade.h b/drivers/gpu/drm/hisilicon/kirin/hisi_drm_ade.h similarity index 100% rename from drivers/gpu/drm/hisilicon/hisi_drm_ade.h rename to drivers/gpu/drm/hisilicon/kirin/hisi_drm_ade.h diff --git a/drivers/gpu/drm/hisilicon/hisi_ade_reg.h b/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h similarity index 100% rename from drivers/gpu/drm/hisilicon/hisi_ade_reg.h rename to drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h diff --git a/drivers/gpu/drm/hisilicon/hisi_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c similarity index 99% rename from drivers/gpu/drm/hisilicon/hisi_drm_ade.c rename to drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c index 64365f5ceaf8c4..fbf68eb18a6ad6 100644 --- a/drivers/gpu/drm/hisilicon/hisi_drm_ade.c +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c @@ -27,8 +27,8 @@ #include #include -#include "hisi_drm_drv.h" -#include "hisi_ade_reg.h" +#include "kirin_drm_drv.h" +#include "kirin_ade_reg.h" #define FORCE_PIXEL_CLOCK_SAME_OR_HIGHER 0 #define PRIMARY_CH (ADE_CH1) diff --git a/drivers/gpu/drm/hisilicon/hisi_drm_drv.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c similarity index 99% rename from drivers/gpu/drm/hisilicon/hisi_drm_drv.c rename to drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c index 76eb7118e731b9..dd23a6662e10b6 100644 --- a/drivers/gpu/drm/hisilicon/hisi_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c @@ -23,7 +23,7 @@ #include #include "hisi_drm_ade.h" -#include "hisi_drm_drv.h" +#include "kirin_drm_drv.h" #define DRIVER_NAME "hisi-drm" diff --git a/drivers/gpu/drm/hisilicon/hisi_drm_drv.h b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.h similarity index 100% rename from drivers/gpu/drm/hisilicon/hisi_drm_drv.h rename to drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.h From f191e4fbb6a854c751234eb440caca5c1437c354 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 13 Jun 2016 15:48:21 -0700 Subject: [PATCH 103/433] drm: kirin: Sync with kirin drm driver This patch syncs the now moved hisi driver with the upstream kirin driver backport from Guodong's rebase tree. This provides binding documentation as well as device-tree changes, which allow for features like HDMI hotplug support as well as DSI panel support. Credit for the hisilicon/kirin driver we're syncing with goes to Xinliang Liu who authored most of the code. Change-Id: I41ddb40c04cb1f6585ddbb7e230ebdeda386905c Signed-off-by: John Stultz --- .../bindings/display/hisilicon/dw-dsi.txt | 72 ++ .../bindings/display/hisilicon/hisi-ade.txt | 68 +- .../bindings/display/hisilicon/hisi-drm.txt | 66 -- .../bindings/display/hisilicon/hisi-dsi.txt | 53 - MAINTAINERS | 10 + .../arm64/boot/dts/hisilicon/hi6220-hikey.dts | 58 +- arch/arm64/boot/dts/hisilicon/hi6220.dtsi | 87 +- arch/arm64/configs/hikey_defconfig | 2 - drivers/gpu/drm/Makefile | 2 +- drivers/gpu/drm/hisilicon/Kconfig | 10 - drivers/gpu/drm/hisilicon/kirin/Kconfig | 1 + drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c | 929 +++++++++++++----- drivers/gpu/drm/hisilicon/kirin/dw_dsi_reg.h | 158 ++- .../gpu/drm/hisilicon/kirin/hisi_drm_ade.h | 16 - .../gpu/drm/hisilicon/kirin/kirin_ade_reg.h | 512 +++------- .../gpu/drm/hisilicon/kirin/kirin_drm_ade.c | 803 +++++++-------- .../gpu/drm/hisilicon/kirin/kirin_drm_drv.c | 255 +++-- .../gpu/drm/hisilicon/kirin/kirin_drm_drv.h | 23 +- drivers/gpu/drm/i2c/adv7511.c | 1 - include/drm/drm_mipi_dsi.h | 27 + 20 files changed, 1743 insertions(+), 1410 deletions(-) create mode 100644 Documentation/devicetree/bindings/display/hisilicon/dw-dsi.txt delete mode 100644 Documentation/devicetree/bindings/display/hisilicon/hisi-drm.txt delete mode 100644 Documentation/devicetree/bindings/display/hisilicon/hisi-dsi.txt delete mode 100644 drivers/gpu/drm/hisilicon/kirin/hisi_drm_ade.h diff --git a/Documentation/devicetree/bindings/display/hisilicon/dw-dsi.txt b/Documentation/devicetree/bindings/display/hisilicon/dw-dsi.txt new file mode 100644 index 00000000000000..d270bfe4e4e072 --- /dev/null +++ b/Documentation/devicetree/bindings/display/hisilicon/dw-dsi.txt @@ -0,0 +1,72 @@ +Device-Tree bindings for DesignWare DSI Host Controller v1.20a driver + +A DSI Host Controller resides in the middle of display controller and external +HDMI converter or panel. + +Required properties: +- compatible: value should be "hisilicon,hi6220-dsi". +- reg: physical base address and length of dsi controller's registers. +- clocks: contains APB clock phandle + clock-specifier pair. +- clock-names: should be "pclk". +- ports: contains DSI controller input and output sub port. + The input port connects to ADE output port with the reg value "0". + The output port with the reg value "1", it could connect to panel or + any other bridge endpoints. + See Documentation/devicetree/bindings/graph.txt for more device graph info. + +A example of HiKey board hi6220 SoC and board specific DT entry: +Example: + +SoC specific: + dsi: dsi@f4107800 { + compatible = "hisilicon,hi6220-dsi"; + reg = <0x0 0xf4107800 0x0 0x100>; + clocks = <&media_ctrl HI6220_DSI_PCLK>; + clock-names = "pclk"; + status = "disabled"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* 0 for input port */ + port@0 { + reg = <0>; + dsi_in: endpoint { + remote-endpoint = <&ade_out>; + }; + }; + }; + }; + + +Board specific: + &dsi { + status = "ok"; + + ports { + /* 1 for output port */ + port@1 { + reg = <1>; + + dsi_out0: endpoint@0 { + remote-endpoint = <&adv7533_in>; + }; + }; + }; + }; + + &i2c2 { + ... + + adv7533: adv7533@39 { + ... + + port { + adv7533_in: endpoint { + remote-endpoint = <&dsi_out0>; + }; + }; + }; + }; + diff --git a/Documentation/devicetree/bindings/display/hisilicon/hisi-ade.txt b/Documentation/devicetree/bindings/display/hisilicon/hisi-ade.txt index 2777a2cad7cd78..38dc9d60eef893 100644 --- a/Documentation/devicetree/bindings/display/hisilicon/hisi-ade.txt +++ b/Documentation/devicetree/bindings/display/hisilicon/hisi-ade.txt @@ -5,15 +5,31 @@ data from memory, do composition, do post image processing, generate RGB timing stream and transfer to DSI. Required properties: -- compatible: value should be one of the following - "hisilicon,hi6220-ade". -- reg: physical base address and length of the controller's registers. -- reg-names: name of physical base. -- interrupt: the interrupt number. -- clocks: the clocks needed. -- clock-names: the name of the clocks. -- ade_core_clk_rate: ADE core clock rate. -- media_noc_clk_rate: media noc module clock rate. +- compatible: value should be "hisilicon,hi6220-ade". +- reg: physical base address and length of the ADE controller's registers. +- hisilicon,noc-syscon: ADE NOC QoS syscon. +- resets: The ADE reset controller node. +- interrupt: the ldi vblank interrupt number used. +- clocks: a list of phandle + clock-specifier pairs, one for each entry + in clock-names. +- clock-names: should contain: + "clk_ade_core" for the ADE core clock. + "clk_codec_jpeg" for the media NOC QoS clock, which use the same clock with + jpeg codec. + "clk_ade_pix" for the ADE pixel clok. +- assigned-clocks: Should contain "clk_ade_core" and "clk_codec_jpeg" clocks' + phandle + clock-specifier pairs. +- assigned-clock-rates: clock rates, one for each entry in assigned-clocks. + The rate of "clk_ade_core" could be "360000000" or "180000000"; + The rate of "clk_codec_jpeg" could be or less than "1440000000". + These rate values could be configured according to performance and power + consumption. +- port: the output port. This contains one endpoint subnode, with its + remote-endpoint set to the phandle of the connected DSI input endpoint. + See Documentation/devicetree/bindings/graph.txt for more device graph info. + +Optional properties: +- dma-coherent: Present if dma operations are coherent. A example of HiKey board hi6220 SoC specific DT entry: @@ -21,22 +37,28 @@ Example: ade: ade@f4100000 { compatible = "hisilicon,hi6220-ade"; - reg = <0x0 0xf4100000 0x0 0x7800>, - <0x0 0xf4410000 0x0 0x1000>; - reg-names = "ade_base", - "media_base"; - interrupts = <0 115 4>; + reg = <0x0 0xf4100000 0x0 0x7800>; + reg-names = "ade_base"; + hisilicon,noc-syscon = <&medianoc_ade>; + resets = <&media_ctrl MEDIA_ADE>; + interrupts = <0 115 4>; /* ldi interrupt */ clocks = <&media_ctrl HI6220_ADE_CORE>, <&media_ctrl HI6220_CODEC_JPEG>, - <&media_ctrl HI6220_ADE_PIX_SRC>, - <&media_ctrl HI6220_PLL_SYS>, - <&media_ctrl HI6220_PLL_SYS_MEDIA>; + <&media_ctrl HI6220_ADE_PIX_SRC>; + /*clock name*/ clock-names = "clk_ade_core", - "aclk_codec_jpeg_src", - "clk_ade_pix", - "clk_syspll_src", - "clk_medpll_src"; - ade_core_clk_rate = <360000000>; - media_noc_clk_rate = <288000000>; + "clk_codec_jpeg", + "clk_ade_pix"; + + assigned-clocks = <&media_ctrl HI6220_ADE_CORE>, + <&media_ctrl HI6220_CODEC_JPEG>; + assigned-clock-rates = <360000000>, <288000000>; + dma-coherent; + + port { + ade_out: endpoint { + remote-endpoint = <&dsi_in>; + }; + }; }; diff --git a/Documentation/devicetree/bindings/display/hisilicon/hisi-drm.txt b/Documentation/devicetree/bindings/display/hisilicon/hisi-drm.txt deleted file mode 100644 index fd930260744f1c..00000000000000 --- a/Documentation/devicetree/bindings/display/hisilicon/hisi-drm.txt +++ /dev/null @@ -1,66 +0,0 @@ -Hisilicon DRM master device - -The Hisilicon DRM master device is a virtual device needed to list all -the other display relevant nodes that comprise the display subsystem. - - -Required properties: -- compatible: Should be "hisilicon,-dss" -- #address-cells: should be set to 2. -- #size-cells: should be set to 2. -- range: to allow probing of subdevices. - -Optional properties: -- dma-coherent: Present if dma operations are coherent. - -Required sub nodes: -All the device nodes of display subsystem of SoC should be the sub nodes. -Such as display controller node, DSI node and so on. - -A example of HiKey board hi6220 SoC specific DT entry: -Example: - - display-subsystem { - compatible = "hisilicon,hi6220-dss"; - #address-cells = <2>; - #size-cells = <2>; - ranges; - dma-coherent; - - ade: ade@f4100000 { - compatible = "hisilicon,hi6220-ade"; - reg = <0x0 0xf4100000 0x0 0x7800>, - <0x0 0xf4410000 0x0 0x1000>; - reg-names = "ade_base", - "media_base"; - interrupts = <0 115 4>; /* ldi interrupt */ - - clocks = <&media_ctrl HI6220_ADE_CORE>, - <&media_ctrl HI6220_CODEC_JPEG>, - <&media_ctrl HI6220_ADE_PIX_SRC>, - <&media_ctrl HI6220_PLL_SYS>, - <&media_ctrl HI6220_PLL_SYS_MEDIA>; - /*clock name*/ - clock-names = "clk_ade_core", - "aclk_codec_jpeg_src", - "clk_ade_pix", - "clk_syspll_src", - "clk_medpll_src"; - ade_core_clk_rate = <360000000>; - media_noc_clk_rate = <288000000>; - }; - - dsi: dsi@0xf4107800 { - compatible = "hisilicon,hi6220-dsi"; - reg = <0x0 0xf4107800 0x0 0x100>; - clocks = <&media_ctrl HI6220_DSI_PCLK>; - clock-names = "pclk_dsi"; - - port { - dsi_out: endpoint { - remote-endpoint = <&adv_in>; - }; - }; - - }; - }; diff --git a/Documentation/devicetree/bindings/display/hisilicon/hisi-dsi.txt b/Documentation/devicetree/bindings/display/hisilicon/hisi-dsi.txt deleted file mode 100644 index 30abaa853b00a0..00000000000000 --- a/Documentation/devicetree/bindings/display/hisilicon/hisi-dsi.txt +++ /dev/null @@ -1,53 +0,0 @@ -Device-Tree bindings for hisilicon DSI controller driver - -A DSI controller resides in the middle of display controller and external -HDMI converter. - -Required properties: -- compatible: value should be one of the following - "hisilicon,hi6220-dsi". -- reg: physical base address and length of the controller's registers. -- clocks: the clocks needed. -- clock-names: the name of the clocks. -- port: DSI controller output port. This contains one endpoint subnode, with its - remote-endpoint set to the phandle of the connected external HDMI endpoint. - See Documentation/devicetree/bindings/graph.txt for device graph info. - -A example of HiKey board hi6220 SoC and board specific DT entry: -Example: - -SoC specific: - dsi: dsi@0xf4107800 { - compatible = "hisilicon,hi6220-dsi"; - reg = <0x0 0xf4107800 0x0 0x100>; - clocks = <&media_ctrl HI6220_DSI_PCLK>; - clock-names = "pclk_dsi"; - - port { - dsi_out: endpoint { - remote-endpoint = <&adv_in>; - }; - }; - - }; - -Board specific: - i2c2: i2c@f7102000 { - status = "ok"; - - adv7533: adv7533@39 { - compatible = "adi,adv7533"; - reg = <0x39>; - interrupt-parent = <&gpio1>; - interrupts = <1 2>; - pd-gpio = <&gpio0 4 0>; - adi,dsi-lanes = <4>; - - port { - adv_in: endpoint { - remote-endpoint = <&dsi_out>; - }; - }; - }; - }; - diff --git a/MAINTAINERS b/MAINTAINERS index ab65bbecb15927..6e8cfea37fd5ca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3705,6 +3705,16 @@ S: Maintained F: drivers/gpu/drm/gma500 F: include/drm/gma500* +DRM DRIVERS FOR HISILICON +M: Xinliang Liu +R: Xinwei Kong +R: Chen Feng +L: dri-devel@lists.freedesktop.org +T: git git://github.com/xin3liang/linux.git +S: Maintained +F: drivers/gpu/drm/hisilicon/ +F: Documentation/devicetree/bindings/display/hisilicon/ + DRM DRIVERS FOR NVIDIA TEGRA M: Thierry Reding M: Terje Bergström diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index 5d5e240a2b0077..c33e713f62e9fe 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -107,15 +107,15 @@ interrupts = <1 2>; pd-gpio = <&gpio0 4 0>; adi,dsi-lanes = <4>; + adi,disable-timing-generator; port { - adv_in: endpoint { - remote-endpoint = <&dsi_out>; + adv7533_in: endpoint { + remote-endpoint = <&dsi_out0>; }; }; }; }; - uart1: uart@f7111000 { status = "ok"; }; @@ -336,3 +336,55 @@ }; }; }; + +&ade { + status = "ok"; +}; + +&dsi { + #address-cells = <1>; + #size-cells = <0>; + mux-gpio = <&gpio0 1 0>; + status = "ok"; + + ports { + /* 1 for output port */ + port@1 { + #address-cells = <1>; + #size-cells = <0>; + reg = <1>; + + dsi_out0: endpoint@0 { + reg = <0>; + remote-endpoint = <&adv7533_in>; + }; + + dsi_out1: endpoint@1 { + reg = <1>; + remote-endpoint = <&panel0_in>; + }; + }; + }; + + /* For panel reg's value should >= 1 */ + panel@1 { + compatible = "innolux,n070icn-pb1"; + reg = <1>; + power-on-delay= <50>; + reset-delay = <100>; + init-delay = <100>; + panel-width-mm = <58>; + panel-height-mm = <103>; + pwr-en-gpio = <&gpio2 1 0>; + bl-en-gpio = <&gpio2 3 0>; + pwm-gpio = <&gpio12 7 0>; + + port { + panel0_in: endpoint { + remote-endpoint = <&dsi_out1>; + }; + }; + }; +}; + + diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi index 80aa50b95debef..09e2c71b5cb365 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi @@ -255,6 +255,7 @@ compatible = "hisilicon,hi6220-mediactrl", "syscon"; reg = <0x0 0xf4410000 0x0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; pm_ctrl: pm_ctrl@f7032000 { @@ -271,6 +272,11 @@ mboxes = <&mailbox 1 0 11>; }; + medianoc_ade: medianoc_ade@f4520000 { + compatible = "syscon"; + reg = <0x0 0xf4520000 0x0 0x4000>; + }; + uart0: uart@f8015000 { /* console */ compatible = "arm,pl011", "arm,primecell"; reg = <0x0 0xf8015000 0x0 0x1000>; @@ -705,7 +711,7 @@ pinctrl-0 = <&spi0_pmx_func &spi0_cfg_func>; num-cs = <1>; cs-gpios = <&gpio6 2 0>; - status = "ok"; + status = "disabled"; }; i2c0: i2c@f7100000 { @@ -925,46 +931,51 @@ }; }; - display-subsystem { - compatible = "hisilicon,hi6220-dss"; - #address-cells = <2>; - #size-cells = <2>; - ranges; - - ade: ade@f4100000 { - compatible = "hisilicon,hi6220-ade"; - reg = <0x0 0xf4100000 0x0 0x7800>, - <0x0 0xf4410000 0x0 0x1000>, - <0x0 0xf4520000 0x0 0x1000>; - reg-names = "ade_base", - "media_base", - "media_noc_base"; - interrupts = <0 115 4>; /* ldi interrupt */ - - clocks = <&media_ctrl HI6220_ADE_CORE>, - <&media_ctrl HI6220_CODEC_JPEG>, - <&media_ctrl HI6220_ADE_PIX_SRC>, - <&media_ctrl HI6220_PLL_SYS>, - <&media_ctrl HI6220_PLL_SYS_MEDIA>; - /*clock name*/ - clock-names = "clk_ade_core", - "aclk_codec_jpeg_src", - "clk_ade_pix", - "clk_syspll_src", - "clk_medpll_src"; - ade_core_clk_rate = <360000000>; - media_noc_clk_rate = <288000000>; + ade: ade@f4100000 { + compatible = "hisilicon,hi6220-ade"; + reg = <0x0 0xf4100000 0x0 0x7800>; + reg-names = "ade_base"; + hisilicon,noc-syscon = <&medianoc_ade>; + resets = <&media_ctrl MEDIA_ADE>; + interrupts = <0 115 4>; /* ldi interrupt */ + + clocks = <&media_ctrl HI6220_ADE_CORE>, + <&media_ctrl HI6220_CODEC_JPEG>, + <&media_ctrl HI6220_ADE_PIX_SRC>; + /*clock name*/ + clock-names = "clk_ade_core", + "clk_codec_jpeg", + "clk_ade_pix"; + + assigned-clocks = <&media_ctrl HI6220_ADE_CORE>, + <&media_ctrl HI6220_CODEC_JPEG>; + assigned-clock-rates = <360000000>, <288000000>; + dma-coherent; + status = "disabled"; + + port { + ade_out: endpoint { + remote-endpoint = <&dsi_in>; + }; }; + }; + + dsi: dsi@f4107800 { + compatible = "hisilicon,hi6220-dsi"; + reg = <0x0 0xf4107800 0x0 0x100>; + clocks = <&media_ctrl HI6220_DSI_PCLK>; + clock-names = "pclk"; + status = "disabled"; - dsi: dsi@0xf4107800 { - compatible = "hisilicon,hi6220-dsi"; - reg = <0x0 0xf4107800 0x0 0x100>; - clocks = <&media_ctrl HI6220_DSI_PCLK>; - clock-names = "pclk_dsi"; + ports { + #address-cells = <1>; + #size-cells = <0>; - port { - dsi_out: endpoint { - remote-endpoint = <&adv_in>; + /* 0 for input port */ + port@0 { + reg = <0>; + dsi_in: endpoint { + remote-endpoint = <&ade_out>; }; }; }; diff --git a/arch/arm64/configs/hikey_defconfig b/arch/arm64/configs/hikey_defconfig index c14ca1c0d3d625..59b0627ad91f63 100644 --- a/arch/arm64/configs/hikey_defconfig +++ b/arch/arm64/configs/hikey_defconfig @@ -305,9 +305,7 @@ CONFIG_MALI_PLAT_SPECIFIC_DT=y CONFIG_DRM=y CONFIG_DRM_CMA_FBDEV_BUFFER_NUM=2 CONFIG_DRM_I2C_ADV7511=y -CONFIG_DRM_HISI=y CONFIG_DRM_HISI_KIRIN=y -CONFIG_HISI_KIRIN_DW_DSI=y CONFIG_FB_ARMCLCD=y CONFIG_FB_SIMPLE=y CONFIG_BACKLIGHT_LCD_SUPPORT=y diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index e7efcb7327f7a0..95ba48234f3e7c 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -75,4 +75,4 @@ obj-y += i2c/ obj-y += panel/ obj-y += bridge/ obj-$(CONFIG_DRM_FSL_DCU) += fsl-dcu/ -obj-$(CONFIG_DRM_HISI) += hisilicon/ +obj-y += hisilicon/ diff --git a/drivers/gpu/drm/hisilicon/Kconfig b/drivers/gpu/drm/hisilicon/Kconfig index 7503981715027d..558c61b1b8e872 100644 --- a/drivers/gpu/drm/hisilicon/Kconfig +++ b/drivers/gpu/drm/hisilicon/Kconfig @@ -1,13 +1,3 @@ -config DRM_HISI - tristate "DRM Support for Hisilicon SoCs Platform" - depends on DRM - select DRM_KMS_HELPER - select DRM_GEM_CMA_HELPER - select DRM_KMS_CMA_HELPER - select DRM_MIPI_DSI - help - Choose this option if you have a hisilicon chipsets(hi6220). - If M is selected the module will be called hisi-drm. # # hisilicon drm device configuration. # Please keep this list sorted alphabetically diff --git a/drivers/gpu/drm/hisilicon/kirin/Kconfig b/drivers/gpu/drm/hisilicon/kirin/Kconfig index 5dcbc8d8e313d6..57f6017f70c248 100644 --- a/drivers/gpu/drm/hisilicon/kirin/Kconfig +++ b/drivers/gpu/drm/hisilicon/kirin/Kconfig @@ -4,6 +4,7 @@ config DRM_HISI_KIRIN select DRM_KMS_HELPER select DRM_GEM_CMA_HELPER select DRM_KMS_CMA_HELPER + select HISI_KIRIN_DW_DSI help Choose this option if you have a hisilicon Kirin chipsets(hi6220). If M is selected the module will be called kirin-drm. diff --git a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c index fe94ccbfb5eb19..aebb8fc27c56b9 100644 --- a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c +++ b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c @@ -1,10 +1,12 @@ /* - * Hisilicon hi6220 SoC dsi driver + * DesignWare MIPI DSI Host Controller v1.02 driver + * + * Copyright (c) 2016 Linaro Limited. + * Copyright (c) 2014-2016 Hisilicon Limited. * - * Copyright (c) 2014-2015 Hisilicon Limited. * Author: - * Xinliang Liu * Xinliang Liu + * Xinliang Liu * Xinwei Kong * * This program is free software; you can redistribute it and/or modify @@ -16,27 +18,39 @@ #include #include #include +#include +#include