Files
kondors1995 2d5c481f29 bpf: squash revert spoofing and some backports:
Squashed commit of the following:

commit 259593385c05a430c4685b611c0e43b4272c22f8
Author: John Galt <johngaltfirstrun@gmail.com>
Date:   Fri Dec 13 08:30:37 2024 -0500

    bpf: squash revert spoofing and some backports:

    Squashed commit of the following:

    commit 8ac5df9c8bc9575059fff6cea0c40463b96fc129
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:58:17 2024 -0500

        Revert "BACKPORT: bpf: add skb_load_bytes_relative helper"

        This reverts commit 029893dcc5d67af16fdf0723bacaae37ec567f67.

    commit dbcbceafe848744ec188f74e87e9717916d359ea
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:58:13 2024 -0500

        Revert "BACKPORT: bpf: encapsulate verifier log state into a structure"

        This reverts commit d861145b97d247cbd9fe1400df52155f48639126.

    commit 478f4dfee0406b54525e68764cc9ba48af1624fc
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:58:10 2024 -0500

        Revert "BACKPORT: bpf: Rename bpf_verifer_log"

        This reverts commit 5d088635de1bf2d6ae9ea94e3dd1c601d30c0cce.

    commit 7bc7c24beb82168b49337530cb56b5dfeeafe19a
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:58:07 2024 -0500

        Revert "BACKPORT: bpf: btf: Introduce BPF Type Format (BTF)"

        This reverts commit 93d34e26514b4d9d15fd176706f57634b2e97485.

    commit 7106457ba90a459b6241fdd44df658c1b52c0e4b
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:58:03 2024 -0500

        Revert "bpf: Update logging functions to work with BTF"

        This reverts commit 97e6c528eb2f76c58a3b6a4c1e7fbeafcd97633a.

    commit 08e68c7ba56f5e78fd1afcd5a2164716a75b0fe3
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:58:00 2024 -0500

        Revert "bpf: btf: Validate type reference"

        This reverts commit c7b7eecbc1134e5d8865af2cc0692fc7156175d5.

    commit 7763cf0831970a64ed62f9b7362fca02ab6e83f1
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:57:51 2024 -0500

        Revert "bpf: btf: Check members of struct/union"

        This reverts commit 9a77b51cad6f04866ca067ca0e70a89b9f59ed56.

    commit eb033235f666b5f66995f4cf89702de7ab4721f8
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:57:47 2024 -0500

        Revert "bpf: btf: Add pretty print capability for data with BTF type info"

        This reverts commit 745692103435221d6e39bc177811769995540525.

    commit c32995674ace91e06c591d2f63177585e81adc75
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:57:43 2024 -0500

        Revert "BACKPORT: bpf: btf: Add BPF_BTF_LOAD command"

        This reverts commit 4e0afd38e20e5aa2df444361309bc07251ca6b2a.

    commit 1310bc8d4aca0015c8723e7624121eddf76b3244
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:57:38 2024 -0500

        Revert "bpf: btf: Add BPF_OBJ_GET_INFO_BY_FD support to BTF fd"

        This reverts commit d4b5d76d9101b97e6fe5181bcefe7f601ed19926.

    commit 881a49445608712bdb0a0f0c959838bdbc725f62
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:57:34 2024 -0500

        Revert "BACKPORT: bpf: btf: Clean up btf.h in uapi"

        This reverts commit 26b661822933d41b3feb59bb284334bfbbc82af4.

    commit e2109fd858ebd5fe392c8bf579b9350fbca35a35
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:57:29 2024 -0500

        Revert "bpf: btf: Avoid WARN_ON when CONFIG_REFCOUNT_FULL=y"

        This reverts commit 9abf878903404e649fef4ad0b189eec1c13d29fe.

    commit 088a7d9137f03da4e0fc1d72add3901823081ccd
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:57:23 2024 -0500

        Revert "bpf: Fix compiler warning on info.map_ids for 32bit platform"

        This reverts commit a3a278e1f6cf167d538ac52f4ad60bb9cf8d4129.

    commit 6e14aed6b63f2b266982454d83678445c062cf39
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:57:13 2024 -0500

        Revert "bpf: btf: Change how section is supported in btf_header"

        This reverts commit 4b60ffd683eb623a184b46761777838d7c49e707.

    commit 151a60855c23bf0317734031481d779efb369d6c
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:57:08 2024 -0500

        Revert "bpf: btf: Check array->index_type"

        This reverts commit b00e10f1a073fadce178b6fb62496722e16db303.

    commit 49775e9074a54ac5f60f518e6fc5a26172996eae
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:57:01 2024 -0500

        Revert "bpf: btf: Remove unused bits from uapi/linux/btf.h"

        This reverts commit c90c6ad34f7a8f565f351d21c2d5b9706838767d.

    commit b6d6c6ab28e4b018da6ce9e64125e63f4191d3d9
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:58 2024 -0500

        Revert "bpf: btf: Avoid variable length array"

        This reverts commit fe7d1f7750242e77a73839d173ac36c3e39d4171.

    commit a45bedecb9b1175fef96f2d64fba2d61777dbf35
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:49 2024 -0500

        Revert "bpf: btf: avoid -Wreturn-type warning"

        This reverts commit 78214f1e390bf1d69d9ae4ee80072ac85c34619e.

    commit 445efb8465b9fa5706d81098417f15656265322e
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:46 2024 -0500

        Revert "bpf: btf: Check array t->size"

        This reverts commit aed532e7466f77885a362e4b863bf90c41e834ba.

    commit 8aada590d525de735cf39196d88722e727c141e9
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:42 2024 -0500

        Revert "bpf: btf: Ensure t->type == 0 for BTF_KIND_FWD"

        This reverts commit 8c8b601dcc2e62e1276b73dfee8b49e40fb65944.

    commit ed67ad09e866c9c30897488088bbb4555ea3dc80
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:38 2024 -0500

        Revert "bpf: btf: Fix bitfield extraction for big endian"

        This reverts commit b0696a226c52868d64963f01665dd1a640a92f2b.

    commit 5cc64db782daf86cdf7ac77133ca94181bb29146
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:35 2024 -0500

        Revert "bpf: btf: Clean up BTF_INT_BITS() in uapi btf.h"

        This reverts commit 0f008594540b09c667ea88fc87cf289b8db334da.

    commit 3a5c6b9010426449c08ecdcc10e758431b1e515f
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:31 2024 -0500

        Revert "bpf: btf: Ensure the member->offset is in the right order"

        This reverts commit c5e361ecd6d45a7cdbffda02e4691a7a37198bdd.

    commit bd6173c1ac458b08d6cedaf06e6e53c93e6b0cc5
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:26 2024 -0500

        Revert "bpf: fix bpf_skb_load_bytes_relative pkt length check"

        This reverts commit 9ea14969874cd7896588df435c890f6f2f547821.

    commit 0b61d26b25a65d9ded4611426c6da9c78e41567c
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:22 2024 -0500

        Revert "bpf: btf: Fix end boundary calculation for type section"

        This reverts commit 08ef221c7fb604cb60c490fa999ec7254d492f05.

    commit 72fb2b9bb5b90f60ab71915fe4e57eeee3308163
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:18 2024 -0500

        Revert "bpf: btf: Fix a missing check bug"

        This reverts commit 594687e3e01e26086f3b0173e5eda9b9f0b672f8.

    commit 575a34ceba4013ad0230038f29f6ea0b3ba41a7e
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:15 2024 -0500

        Revert "bpf, btf: fix a missing check bug in btf_parse"

        This reverts commit 6bf31bbc438663756e92fb0aad4f5a35fd730fb0.

    commit bcca98c0bc5e19b38af3ddcd0feee80ad26e1f96
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:11 2024 -0500

        Revert "bpf: fix BTF limits"

        This reverts commit e351b26ae671dfacd82f27c1c5f66cf8089d930d.

    commit f71c484e340041d8828c94b39a233ea587d8cc09
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:07 2024 -0500

        Revert "bpf/btf: Fix BTF verification of enum members in struct/union"

        This reverts commit 861e65b744c171d59850e61a01715f194f25e45c.

    commit eca310722a2624d33cd49884aa18c36d435b10f8
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:56:02 2024 -0500

        Revert "bpf: btf: fix truncated last_member_type_id in btf_struct_resolve"

        This reverts commit d6cd1eac41b10e606ec7f445162a0617c01be973.

    commit caae5c99a3ca7bed0e318b31b6aa7ca8260a1c52
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:58 2024 -0500

        Revert "BACKPORT: net: bpf: rename ndo_xdp to ndo_bpf"

        This reverts commit 2a1ddcb6a384745195d57b4e4cdda2a55d2cbe47.

    commit f90bdcdaa095a4f10268bb740470a3e0893be21b
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:54 2024 -0500

        Revert "BACKPORT: bpf: offload: add infrastructure for loading programs for a specific netdev"

        This reverts commit a9516d402726094eafccce26a99cf5110d188be9.

    commit c6e0ce9019c06d9a45c030a2bc38eed320afd45a
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:50 2024 -0500

        Revert "bpf: offload: rename the ifindex field"

        This reverts commit 36bc9c7351a1dc78b3e71571998af381e876b4cb.

    commit 88b6a4d41b69df804b846a8ebdca410517e08343
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:46 2024 -0500

        Revert "BACKPORT: bpf: Check attach type at prog load time"

        This reverts commit fe5a0d514e4970d86983458136d4a2f6caeee365.

    commit 9ccfaa66a5ea042331f0aacdb3667e23c8ed363e
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:43 2024 -0500

        Revert "BACKPORT: bpf: introduce BPF_PROG_QUERY command"

        This reverts commit a5720688858170f1054f9549b5a628db1c252a88.

    commit adab2743b3fa0853d0351b33b0a286de745025e5
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:37 2024 -0500

        Revert "BACKPORT: bpf: Hooks for sys_bind"

        This reverts commit e484887c7e7aa026521ddc1773233368a6304b24.

    commit d462e09db98ad89b3a836f9b9a925812b0d8cfe7
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:33 2024 -0500

        Revert "BACKPORT: net: Introduce __inet_bind() and __inet6_bind"

        This reverts commit 41a3131c3e94c28fd084dd6f4358baee3824fd17.

    commit cdf7f55dc65b4bdf7ecfc924be77c6a039709b3d
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:29 2024 -0500

        Revert "BACKPORT: bpf: Hooks for sys_connect"

        This reverts commit f26fe7233e2885ef489707ab5a5a5dda9f081b80.

    commit 97685d5058f76ba4ea6dd2db157f4537f3a8953d
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:23 2024 -0500

        Revert "BACKPORT: bpf: Post-hooks for sys_bind"

        This reverts commit 284ac5bc7c70dac338301445e94e1ad40fb40fdb.

    commit d03d9c05036d3109eae643f473cc5a5ad0a80721
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:19 2024 -0500

        Revert "kernel: bpf: devmap: Create __dev_map_alloc_node"

        This reverts commit db726149fa9abfd1ca9add3e2db6b1524f7e90a3.

    commit 8c34bcb3e4c6630799764871b4af2e5f9344a371
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:15 2024 -0500

        Revert "BACKPORT: xdp: Add devmap_hash map type for looking up devices by hashed index"

        This reverts commit c4d4e1d201d8433e06b2ac66041d7105095a0204.

    commit ef277c7b3a08fd59943eb2b47af64afc513de008
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:11 2024 -0500

        Revert "BACKPORT: devmap: Allow map lookups from eBPF"

        This reverts commit 24d196375871c72de0de977de79afede5a7d1780.

    commit 4fcd87869c55c28ed59bff916d640147601816d2
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:07 2024 -0500

        Revert "gen_headers_{arm, arm64}: Add btf.h to the list"

        This reverts commit 37edfe7c90bac355885ffec3327b338a34619792.

    commit b89560e0b405b58ecc5fc12c15ad4f56147760d6
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:55:03 2024 -0500

        Revert "syscall: Fake uname to 4.19 for bpfloader/netd"

        This reverts commit 186e74af61269602d0c068d98928b1f25e03eba2.

    commit fd49f8c35eb7875d6810a5a52877ebc59bfd4530
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:54:59 2024 -0500

        Revert "syscall: Fake uname to 4.19 also for netbpfload"

        This reverts commit 34b9a1ab387d7dc83ede613b2c12b3741ea08edb.

    commit b853fcf2ff892664d0ff522ca7fd530bc94c023e
    Author: John Galt <johngaltfirstrun@gmail.com>
    Date:   Fri Dec 13 07:54:53 2024 -0500

        Revert "syscall: Increase bpf fake uname to 5.4"

        This reverts commit 9cdc014e11b410a7f03d8c968a35ee0dd6a28fff.

    # Conflicts:
    #	net/ipv4/af_inet.c
    #	net/ipv6/af_inet6.c

commit 4a0143fa36d300485650dc447b580151a69a3be2
Author: kondors1995 <normandija1945@gmail.com>
Date:   Wed Dec 18 13:48:16 2024 +0200

    Revert "syscall: Fake uname to 4.19 for bpfloader/netd"

    This reverts commit 417f37c97f.

commit 6f512c5c7341a51d7bbc9cdd93814764cae8868f
Author: kondors1995 <normandija1945@gmail.com>
Date:   Wed Dec 18 13:48:16 2024 +0200

    Revert "syscall: Fake uname to 4.19 also for netbpfload"

    This reverts commit a4c61c3d97.

commit 41f326616251f0122d81e518082ef7faaad4b2e5
Author: kondors1995 <normandija1945@gmail.com>
Date:   Wed Dec 18 13:48:15 2024 +0200

    Revert "syscall: Increase bpf fake uname to 5.4"

    This reverts commit 4a906017d4.

commit a0d3db72a836096cf533516d56c81a43150976ed
Author: kondors1995 <normandija1945@gmail.com>
Date:   Wed Dec 18 13:46:12 2024 +0200

    Revert "bpf: Hooks for sys_sendmsg"

    This reverts commit 735c155332.

commit 246eb3d90b95e0ab5aee8d5a9e9cd639c7beb174
Author: kondors1995 <normandija1945@gmail.com>
Date:   Wed Dec 18 13:45:08 2024 +0200

    Revert "syscall: Increase fake uname to 6.6.40"

    This reverts commit 92494b9920.

commit c56eaa5b7f170f58f2ade14bb71aaad2964b9018
Author: kondors1995 <normandija1945@gmail.com>
Date:   Mon Dec 9 21:35:20 2024 +0200

    raphael_defconfig: increase sbalance pooling rate to 10s

commit 54d190b8af
Author: Sultan Alsawaf <sultan@kerneltoast.com>
Date:   Wed Dec 4 15:53:22 2024 -0800

    sbalance: Fix severe misattribution of movable IRQs to the last active CPU

    Due to a horrible omission in the big IRQ list traversal, all movable IRQs
    are misattributed to the last active CPU in the system since that's what
    `bd` is last set to in the loop prior. This horribly breaks SBalance's
    notion of balance, producing nonsensical balancing decisions and failing to
    balance IRQs even when they are heavily imbalanced.

    Fix the massive breakage by adding the missing line of code to set `bd` to
    the CPU an IRQ actually belongs to, so that it's added to the correct CPU's
    movable IRQs list.

    Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>

commit f2fa2db581
Author: Sultan Alsawaf <sultan@kerneltoast.com>
Date:   Wed Dec 4 14:31:52 2024 -0800

    sbalance: Don't race with CPU hotplug

    When a CPU is hotplugged, cpu_active_mask is modified without any RCU
    synchronization. As a result, the only synchronization for cpu_active_mask
    provided by the hotplug code is the CPU hotplug lock.

    Furthermore, since IRQ balance is majorly disrupted during CPU hotplug due
    to mass IRQ migration off a dying CPU, SBalance just shouldn't operate
    while a CPU hotplug is in progress.

    Take the CPU hotplug lock in balance_irqs() to prevent races and mishaps
    during CPU hotplugs.

    Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>

commit a4e81ff60a
Author: Sultan Alsawaf <sultan@kerneltoast.com>
Date:   Wed Dec 4 14:16:48 2024 -0800

    sbalance: Convert various IRQ counter types to unsigned ints

    These counted values are actually unsigned ints, not unsigned longs.
    Convert them to unsigned ints since there's no reason for them to be longs.

    Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
2024-12-19 17:34:31 +02:00

4459 lines
108 KiB
C

/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Routing netlink socket interface: protocol independent part.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Fixes:
* Vitaly E. Lavrov RTA_OK arithmetics was wrong.
*/
#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/kernel.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/capability.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/security.h>
#include <linux/mutex.h>
#include <linux/if_addr.h>
#include <linux/if_bridge.h>
#include <linux/if_vlan.h>
#include <linux/pci.h>
#include <linux/etherdevice.h>
#include <linux/bpf.h>
#include <linux/uaccess.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <net/switchdev.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
#include <net/route.h>
#include <net/udp.h>
#include <net/tcp.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/fib_rules.h>
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
#define RTNL_MAX_TYPE 48
#define RTNL_SLAVE_MAX_TYPE 36
struct rtnl_link {
rtnl_doit_func doit;
rtnl_dumpit_func dumpit;
unsigned int flags;
};
static DEFINE_MUTEX(rtnl_mutex);
void rtnl_lock(void)
{
mutex_lock(&rtnl_mutex);
}
EXPORT_SYMBOL(rtnl_lock);
static struct sk_buff *defer_kfree_skb_list;
void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
{
if (head && tail) {
tail->next = defer_kfree_skb_list;
defer_kfree_skb_list = head;
}
}
EXPORT_SYMBOL(rtnl_kfree_skbs);
void __rtnl_unlock(void)
{
struct sk_buff *head = defer_kfree_skb_list;
defer_kfree_skb_list = NULL;
mutex_unlock(&rtnl_mutex);
while (head) {
struct sk_buff *next = head->next;
kfree_skb(head);
cond_resched();
head = next;
}
}
void rtnl_unlock(void)
{
/* This fellow will unlock it for us. */
netdev_run_todo();
}
EXPORT_SYMBOL(rtnl_unlock);
int rtnl_trylock(void)
{
return mutex_trylock(&rtnl_mutex);
}
EXPORT_SYMBOL(rtnl_trylock);
int rtnl_is_locked(void)
{
return mutex_is_locked(&rtnl_mutex);
}
EXPORT_SYMBOL(rtnl_is_locked);
#ifdef CONFIG_PROVE_LOCKING
bool lockdep_rtnl_is_held(void)
{
return lockdep_is_held(&rtnl_mutex);
}
EXPORT_SYMBOL(lockdep_rtnl_is_held);
#endif /* #ifdef CONFIG_PROVE_LOCKING */
static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
{
int msgindex = msgtype - RTM_BASE;
/*
* msgindex < 0 implies someone tried to register a netlink
* control code. msgindex >= RTM_NR_MSGTYPES may indicate that
* the message type has not been added to linux/rtnetlink.h
*/
BUG_ON(msgindex < 0 || msgindex >= RTM_NR_MSGTYPES);
return msgindex;
}
/**
* __rtnl_register - Register a rtnetlink message type
* @protocol: Protocol family or PF_UNSPEC
* @msgtype: rtnetlink message type
* @doit: Function pointer called for each request message
* @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
* @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
*
* Registers the specified function pointers (at least one of them has
* to be non-NULL) to be called whenever a request message for the
* specified protocol family and message type is received.
*
* The special protocol family PF_UNSPEC may be used to define fallback
* function pointers for the case when no entry for the specific protocol
* family exists.
*
* Returns 0 on success or a negative error code.
*/
int __rtnl_register(int protocol, int msgtype,
rtnl_doit_func doit, rtnl_dumpit_func dumpit,
unsigned int flags)
{
struct rtnl_link *tab;
int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]);
if (tab == NULL) {
tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
if (tab == NULL)
return -ENOBUFS;
rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
}
if (doit)
tab[msgindex].doit = doit;
if (dumpit)
tab[msgindex].dumpit = dumpit;
tab[msgindex].flags |= flags;
return 0;
}
EXPORT_SYMBOL_GPL(__rtnl_register);
/**
* rtnl_register - Register a rtnetlink message type
*
* Identical to __rtnl_register() but panics on failure. This is useful
* as failure of this function is very unlikely, it can only happen due
* to lack of memory when allocating the chain to store all message
* handlers for a protocol. Meant for use in init functions where lack
* of memory implies no sense in continuing.
*/
void rtnl_register(int protocol, int msgtype,
rtnl_doit_func doit, rtnl_dumpit_func dumpit,
unsigned int flags)
{
if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0)
panic("Unable to register rtnetlink message handler, "
"protocol = %d, message type = %d\n",
protocol, msgtype);
}
EXPORT_SYMBOL_GPL(rtnl_register);
/**
* rtnl_unregister - Unregister a rtnetlink message type
* @protocol: Protocol family or PF_UNSPEC
* @msgtype: rtnetlink message type
*
* Returns 0 on success or a negative error code.
*/
int rtnl_unregister(int protocol, int msgtype)
{
struct rtnl_link *handlers;
int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
rtnl_lock();
handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
if (!handlers) {
rtnl_unlock();
return -ENOENT;
}
handlers[msgindex].doit = NULL;
handlers[msgindex].dumpit = NULL;
handlers[msgindex].flags = 0;
rtnl_unlock();
return 0;
}
EXPORT_SYMBOL_GPL(rtnl_unregister);
/**
* rtnl_unregister_all - Unregister all rtnetlink message type of a protocol
* @protocol : Protocol family or PF_UNSPEC
*
* Identical to calling rtnl_unregster() for all registered message types
* of a certain protocol family.
*/
void rtnl_unregister_all(int protocol)
{
struct rtnl_link *handlers;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
rtnl_lock();
handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
rtnl_unlock();
synchronize_net();
while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1)
schedule();
kfree(handlers);
}
EXPORT_SYMBOL_GPL(rtnl_unregister_all);
static LIST_HEAD(link_ops);
static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
{
const struct rtnl_link_ops *ops;
list_for_each_entry(ops, &link_ops, list) {
if (!strcmp(ops->kind, kind))
return ops;
}
return NULL;
}
/**
* __rtnl_link_register - Register rtnl_link_ops with rtnetlink.
* @ops: struct rtnl_link_ops * to register
*
* The caller must hold the rtnl_mutex. This function should be used
* by drivers that create devices during module initialization. It
* must be called before registering the devices.
*
* Returns 0 on success or a negative error code.
*/
int __rtnl_link_register(struct rtnl_link_ops *ops)
{
if (rtnl_link_ops_get(ops->kind))
return -EEXIST;
/* The check for setup is here because if ops
* does not have that filled up, it is not possible
* to use the ops for creating device. So do not
* fill up dellink as well. That disables rtnl_dellink.
*/
if (ops->setup && !ops->dellink)
ops->dellink = unregister_netdevice_queue;
list_add_tail(&ops->list, &link_ops);
return 0;
}
EXPORT_SYMBOL_GPL(__rtnl_link_register);
/**
* rtnl_link_register - Register rtnl_link_ops with rtnetlink.
* @ops: struct rtnl_link_ops * to register
*
* Returns 0 on success or a negative error code.
*/
int rtnl_link_register(struct rtnl_link_ops *ops)
{
int err;
/* Sanity-check max sizes to avoid stack buffer overflow. */
if (WARN_ON(ops->maxtype > RTNL_MAX_TYPE ||
ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE))
return -EINVAL;
rtnl_lock();
err = __rtnl_link_register(ops);
rtnl_unlock();
return err;
}
EXPORT_SYMBOL_GPL(rtnl_link_register);
static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
{
struct net_device *dev;
LIST_HEAD(list_kill);
for_each_netdev(net, dev) {
if (dev->rtnl_link_ops == ops)
ops->dellink(dev, &list_kill);
}
unregister_netdevice_many(&list_kill);
}
/**
* __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
* @ops: struct rtnl_link_ops * to unregister
*
* The caller must hold the rtnl_mutex.
*/
void __rtnl_link_unregister(struct rtnl_link_ops *ops)
{
struct net *net;
for_each_net(net) {
__rtnl_kill_links(net, ops);
}
list_del(&ops->list);
}
EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
/* Return with the rtnl_lock held when there are no network
* devices unregistering in any network namespace.
*/
static void rtnl_lock_unregistering_all(void)
{
struct net *net;
bool unregistering;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
add_wait_queue(&netdev_unregistering_wq, &wait);
for (;;) {
unregistering = false;
rtnl_lock();
for_each_net(net) {
if (net->dev_unreg_count > 0) {
unregistering = true;
break;
}
}
if (!unregistering)
break;
__rtnl_unlock();
wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
remove_wait_queue(&netdev_unregistering_wq, &wait);
}
/**
* rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
* @ops: struct rtnl_link_ops * to unregister
*/
void rtnl_link_unregister(struct rtnl_link_ops *ops)
{
/* Close the race with cleanup_net() */
mutex_lock(&net_mutex);
rtnl_lock_unregistering_all();
__rtnl_link_unregister(ops);
rtnl_unlock();
mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(rtnl_link_unregister);
static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev)
{
struct net_device *master_dev;
const struct rtnl_link_ops *ops;
size_t size = 0;
rcu_read_lock();
master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
if (!master_dev)
goto out;
ops = master_dev->rtnl_link_ops;
if (!ops || !ops->get_slave_size)
goto out;
/* IFLA_INFO_SLAVE_DATA + nested data */
size = nla_total_size(sizeof(struct nlattr)) +
ops->get_slave_size(master_dev, dev);
out:
rcu_read_unlock();
return size;
}
static size_t rtnl_link_get_size(const struct net_device *dev)
{
const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
size_t size;
if (!ops)
return 0;
size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */
if (ops->get_size)
/* IFLA_INFO_DATA + nested data */
size += nla_total_size(sizeof(struct nlattr)) +
ops->get_size(dev);
if (ops->get_xstats_size)
/* IFLA_INFO_XSTATS */
size += nla_total_size(ops->get_xstats_size(dev));
size += rtnl_link_get_slave_info_data_size(dev);
return size;
}
static LIST_HEAD(rtnl_af_ops);
static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
{
const struct rtnl_af_ops *ops;
list_for_each_entry(ops, &rtnl_af_ops, list) {
if (ops->family == family)
return ops;
}
return NULL;
}
/**
* rtnl_af_register - Register rtnl_af_ops with rtnetlink.
* @ops: struct rtnl_af_ops * to register
*
* Returns 0 on success or a negative error code.
*/
void rtnl_af_register(struct rtnl_af_ops *ops)
{
rtnl_lock();
list_add_tail(&ops->list, &rtnl_af_ops);
rtnl_unlock();
}
EXPORT_SYMBOL_GPL(rtnl_af_register);
/**
* __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
* @ops: struct rtnl_af_ops * to unregister
*
* The caller must hold the rtnl_mutex.
*/
void __rtnl_af_unregister(struct rtnl_af_ops *ops)
{
list_del(&ops->list);
}
EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
/**
* rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
* @ops: struct rtnl_af_ops * to unregister
*/
void rtnl_af_unregister(struct rtnl_af_ops *ops)
{
rtnl_lock();
__rtnl_af_unregister(ops);
rtnl_unlock();
}
EXPORT_SYMBOL_GPL(rtnl_af_unregister);
static size_t rtnl_link_get_af_size(const struct net_device *dev,
u32 ext_filter_mask)
{
struct rtnl_af_ops *af_ops;
size_t size;
/* IFLA_AF_SPEC */
size = nla_total_size(sizeof(struct nlattr));
list_for_each_entry(af_ops, &rtnl_af_ops, list) {
if (af_ops->get_link_af_size) {
/* AF_* + nested data */
size += nla_total_size(sizeof(struct nlattr)) +
af_ops->get_link_af_size(dev, ext_filter_mask);
}
}
return size;
}
static bool rtnl_have_link_slave_info(const struct net_device *dev)
{
struct net_device *master_dev;
master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
if (master_dev && master_dev->rtnl_link_ops)
return true;
return false;
}
static int rtnl_link_slave_info_fill(struct sk_buff *skb,
const struct net_device *dev)
{
struct net_device *master_dev;
const struct rtnl_link_ops *ops;
struct nlattr *slave_data;
int err;
master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
if (!master_dev)
return 0;
ops = master_dev->rtnl_link_ops;
if (!ops)
return 0;
if (nla_put_string(skb, IFLA_INFO_SLAVE_KIND, ops->kind) < 0)
return -EMSGSIZE;
if (ops->fill_slave_info) {
slave_data = nla_nest_start(skb, IFLA_INFO_SLAVE_DATA);
if (!slave_data)
return -EMSGSIZE;
err = ops->fill_slave_info(skb, master_dev, dev);
if (err < 0)
goto err_cancel_slave_data;
nla_nest_end(skb, slave_data);
}
return 0;
err_cancel_slave_data:
nla_nest_cancel(skb, slave_data);
return err;
}
static int rtnl_link_info_fill(struct sk_buff *skb,
const struct net_device *dev)
{
const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
struct nlattr *data;
int err;
if (!ops)
return 0;
if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0)
return -EMSGSIZE;
if (ops->fill_xstats) {
err = ops->fill_xstats(skb, dev);
if (err < 0)
return err;
}
if (ops->fill_info) {
data = nla_nest_start(skb, IFLA_INFO_DATA);
if (data == NULL)
return -EMSGSIZE;
err = ops->fill_info(skb, dev);
if (err < 0)
goto err_cancel_data;
nla_nest_end(skb, data);
}
return 0;
err_cancel_data:
nla_nest_cancel(skb, data);
return err;
}
static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
{
struct nlattr *linkinfo;
int err = -EMSGSIZE;
linkinfo = nla_nest_start(skb, IFLA_LINKINFO);
if (linkinfo == NULL)
goto out;
err = rtnl_link_info_fill(skb, dev);
if (err < 0)
goto err_cancel_link;
err = rtnl_link_slave_info_fill(skb, dev);
if (err < 0)
goto err_cancel_link;
nla_nest_end(skb, linkinfo);
return 0;
err_cancel_link:
nla_nest_cancel(skb, linkinfo);
out:
return err;
}
int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo)
{
struct sock *rtnl = net->rtnl;
int err = 0;
NETLINK_CB(skb).dst_group = group;
if (echo)
refcount_inc(&skb->users);
netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
if (echo)
err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
return err;
}
int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
{
struct sock *rtnl = net->rtnl;
return nlmsg_unicast(rtnl, skb, pid);
}
EXPORT_SYMBOL(rtnl_unicast);
void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
struct nlmsghdr *nlh, gfp_t flags)
{
struct sock *rtnl = net->rtnl;
int report = 0;
if (nlh)
report = nlmsg_report(nlh);
nlmsg_notify(rtnl, skb, pid, group, report, flags);
}
EXPORT_SYMBOL(rtnl_notify);
void rtnl_set_sk_err(struct net *net, u32 group, int error)
{
struct sock *rtnl = net->rtnl;
netlink_set_err(rtnl, 0, group, error);
}
EXPORT_SYMBOL(rtnl_set_sk_err);
int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
{
struct nlattr *mx;
int i, valid = 0;
mx = nla_nest_start(skb, RTA_METRICS);
if (mx == NULL)
return -ENOBUFS;
for (i = 0; i < RTAX_MAX; i++) {
if (metrics[i]) {
if (i == RTAX_CC_ALGO - 1) {
char tmp[TCP_CA_NAME_MAX], *name;
name = tcp_ca_get_name_by_key(metrics[i], tmp);
if (!name)
continue;
if (nla_put_string(skb, i + 1, name))
goto nla_put_failure;
} else if (i == RTAX_FEATURES - 1) {
u32 user_features = metrics[i] & RTAX_FEATURE_MASK;
if (!user_features)
continue;
BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK);
if (nla_put_u32(skb, i + 1, user_features))
goto nla_put_failure;
} else {
if (nla_put_u32(skb, i + 1, metrics[i]))
goto nla_put_failure;
}
valid++;
}
}
if (!valid) {
nla_nest_cancel(skb, mx);
return 0;
}
return nla_nest_end(skb, mx);
nla_put_failure:
nla_nest_cancel(skb, mx);
return -EMSGSIZE;
}
EXPORT_SYMBOL(rtnetlink_put_metrics);
int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
long expires, u32 error)
{
struct rta_cacheinfo ci = {
.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse),
.rta_used = dst->__use,
.rta_clntref = atomic_read(&(dst->__refcnt)),
.rta_error = error,
.rta_id = id,
};
if (expires) {
unsigned long clock;
clock = jiffies_to_clock_t(abs(expires));
clock = min_t(unsigned long, clock, INT_MAX);
ci.rta_expires = (expires > 0) ? clock : -clock;
}
return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
}
EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
static void set_operstate(struct net_device *dev, unsigned char transition)
{
unsigned char operstate = dev->operstate;
switch (transition) {
case IF_OPER_UP:
if ((operstate == IF_OPER_DORMANT ||
operstate == IF_OPER_UNKNOWN) &&
!netif_dormant(dev))
operstate = IF_OPER_UP;
break;
case IF_OPER_DORMANT:
if (operstate == IF_OPER_UP ||
operstate == IF_OPER_UNKNOWN)
operstate = IF_OPER_DORMANT;
break;
}
if (dev->operstate != operstate) {
write_lock_bh(&dev_base_lock);
dev->operstate = operstate;
write_unlock_bh(&dev_base_lock);
netdev_state_change(dev);
}
}
static unsigned int rtnl_dev_get_flags(const struct net_device *dev)
{
return (dev->flags & ~(IFF_PROMISC | IFF_ALLMULTI)) |
(dev->gflags & (IFF_PROMISC | IFF_ALLMULTI));
}
static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
const struct ifinfomsg *ifm)
{
unsigned int flags = ifm->ifi_flags;
/* bugwards compatibility: ifi_change == 0 is treated as ~0 */
if (ifm->ifi_change)
flags = (flags & ifm->ifi_change) |
(rtnl_dev_get_flags(dev) & ~ifm->ifi_change);
return flags;
}
static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
const struct rtnl_link_stats64 *b)
{
a->rx_packets = b->rx_packets;
a->tx_packets = b->tx_packets;
a->rx_bytes = b->rx_bytes;
a->tx_bytes = b->tx_bytes;
a->rx_errors = b->rx_errors;
a->tx_errors = b->tx_errors;
a->rx_dropped = b->rx_dropped;
a->tx_dropped = b->tx_dropped;
a->multicast = b->multicast;
a->collisions = b->collisions;
a->rx_length_errors = b->rx_length_errors;
a->rx_over_errors = b->rx_over_errors;
a->rx_crc_errors = b->rx_crc_errors;
a->rx_frame_errors = b->rx_frame_errors;
a->rx_fifo_errors = b->rx_fifo_errors;
a->rx_missed_errors = b->rx_missed_errors;
a->tx_aborted_errors = b->tx_aborted_errors;
a->tx_carrier_errors = b->tx_carrier_errors;
a->tx_fifo_errors = b->tx_fifo_errors;
a->tx_heartbeat_errors = b->tx_heartbeat_errors;
a->tx_window_errors = b->tx_window_errors;
a->rx_compressed = b->rx_compressed;
a->tx_compressed = b->tx_compressed;
a->rx_nohandler = b->rx_nohandler;
}
/* All VF info */
static inline int rtnl_vfinfo_size(const struct net_device *dev,
u32 ext_filter_mask)
{
if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF)) {
int num_vfs = dev_num_vf(dev->dev.parent);
size_t size = nla_total_size(0);
size += num_vfs *
(nla_total_size(0) +
nla_total_size(sizeof(struct ifla_vf_mac)) +
nla_total_size(sizeof(struct ifla_vf_vlan)) +
nla_total_size(0) + /* nest IFLA_VF_VLAN_LIST */
nla_total_size(MAX_VLAN_LIST_LEN *
sizeof(struct ifla_vf_vlan_info)) +
nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
nla_total_size(sizeof(struct ifla_vf_rate)) +
nla_total_size(sizeof(struct ifla_vf_link_state)) +
nla_total_size(sizeof(struct ifla_vf_rss_query_en)) +
nla_total_size(0) + /* nest IFLA_VF_STATS */
/* IFLA_VF_STATS_RX_PACKETS */
nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_TX_PACKETS */
nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_RX_BYTES */
nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_TX_BYTES */
nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_BROADCAST */
nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_MULTICAST */
nla_total_size_64bit(sizeof(__u64)) +
nla_total_size(sizeof(struct ifla_vf_trust)));
return size;
} else
return 0;
}
static size_t rtnl_port_size(const struct net_device *dev,
u32 ext_filter_mask)
{
size_t port_size = nla_total_size(4) /* PORT_VF */
+ nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */
+ nla_total_size(PORT_UUID_MAX) /* PORT_INSTANCE_UUID */
+ nla_total_size(PORT_UUID_MAX) /* PORT_HOST_UUID */
+ nla_total_size(1) /* PROT_VDP_REQUEST */
+ nla_total_size(2); /* PORT_VDP_RESPONSE */
size_t vf_ports_size = nla_total_size(sizeof(struct nlattr));
size_t vf_port_size = nla_total_size(sizeof(struct nlattr))
+ port_size;
size_t port_self_size = nla_total_size(sizeof(struct nlattr))
+ port_size;
if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
!(ext_filter_mask & RTEXT_FILTER_VF))
return 0;
if (dev_num_vf(dev->dev.parent))
return port_self_size + vf_ports_size +
vf_port_size * dev_num_vf(dev->dev.parent);
else
return port_self_size;
}
static size_t rtnl_xdp_size(void)
{
size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */
nla_total_size(1) + /* XDP_ATTACHED */
nla_total_size(4); /* XDP_PROG_ID */
return xdp_size;
}
static noinline size_t if_nlmsg_size(const struct net_device *dev,
u32 ext_filter_mask)
{
return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ nla_total_size(IFALIASZ) /* IFLA_IFALIAS */
+ nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
+ nla_total_size_64bit(sizeof(struct rtnl_link_ifmap))
+ nla_total_size(sizeof(struct rtnl_link_stats))
+ nla_total_size_64bit(sizeof(struct rtnl_link_stats64))
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
+ nla_total_size(4) /* IFLA_TXQLEN */
+ nla_total_size(4) /* IFLA_WEIGHT */
+ nla_total_size(4) /* IFLA_MTU */
+ nla_total_size(4) /* IFLA_LINK */
+ nla_total_size(4) /* IFLA_MASTER */
+ nla_total_size(1) /* IFLA_CARRIER */
+ nla_total_size(4) /* IFLA_PROMISCUITY */
+ nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
+ nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
+ nla_total_size(4) /* IFLA_GSO_MAX_SEGS */
+ nla_total_size(4) /* IFLA_GSO_MAX_SIZE */
+ nla_total_size(1) /* IFLA_OPERSTATE */
+ nla_total_size(1) /* IFLA_LINKMODE */
+ nla_total_size(4) /* IFLA_CARRIER_CHANGES */
+ nla_total_size(4) /* IFLA_LINK_NETNSID */
+ nla_total_size(4) /* IFLA_GROUP */
+ nla_total_size(ext_filter_mask
& RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
+ rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
+ nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
+ rtnl_xdp_size() /* IFLA_XDP */
+ nla_total_size(4) /* IFLA_EVENT */
+ nla_total_size(1); /* IFLA_PROTO_DOWN */
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
{
struct nlattr *vf_ports;
struct nlattr *vf_port;
int vf;
int err;
vf_ports = nla_nest_start(skb, IFLA_VF_PORTS);
if (!vf_ports)
return -EMSGSIZE;
for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
vf_port = nla_nest_start(skb, IFLA_VF_PORT);
if (!vf_port)
goto nla_put_failure;
if (nla_put_u32(skb, IFLA_PORT_VF, vf))
goto nla_put_failure;
err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
if (err == -EMSGSIZE)
goto nla_put_failure;
if (err) {
nla_nest_cancel(skb, vf_port);
continue;
}
nla_nest_end(skb, vf_port);
}
nla_nest_end(skb, vf_ports);
return 0;
nla_put_failure:
nla_nest_cancel(skb, vf_ports);
return -EMSGSIZE;
}
static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
{
struct nlattr *port_self;
int err;
port_self = nla_nest_start(skb, IFLA_PORT_SELF);
if (!port_self)
return -EMSGSIZE;
err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
if (err) {
nla_nest_cancel(skb, port_self);
return (err == -EMSGSIZE) ? err : 0;
}
nla_nest_end(skb, port_self);
return 0;
}
static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev,
u32 ext_filter_mask)
{
int err;
if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
!(ext_filter_mask & RTEXT_FILTER_VF))
return 0;
err = rtnl_port_self_fill(skb, dev);
if (err)
return err;
if (dev_num_vf(dev->dev.parent)) {
err = rtnl_vf_ports_fill(skb, dev);
if (err)
return err;
}
return 0;
}
static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
{
int err;
struct netdev_phys_item_id ppid;
err = dev_get_phys_port_id(dev, &ppid);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
return err;
}
if (nla_put(skb, IFLA_PHYS_PORT_ID, ppid.id_len, ppid.id))
return -EMSGSIZE;
return 0;
}
static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
{
char name[IFNAMSIZ];
int err;
err = dev_get_phys_port_name(dev, name, sizeof(name));
if (err) {
if (err == -EOPNOTSUPP)
return 0;
return err;
}
if (nla_put_string(skb, IFLA_PHYS_PORT_NAME, name))
return -EMSGSIZE;
return 0;
}
static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
{
int err;
struct switchdev_attr attr = {
.orig_dev = dev,
.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
.flags = SWITCHDEV_F_NO_RECURSE,
};
err = switchdev_port_attr_get(dev, &attr);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
return err;
}
if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.u.ppid.id_len,
attr.u.ppid.id))
return -EMSGSIZE;
return 0;
}
static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
struct net_device *dev)
{
struct rtnl_link_stats64 *sp;
struct nlattr *attr;
attr = nla_reserve_64bit(skb, IFLA_STATS64,
sizeof(struct rtnl_link_stats64), IFLA_PAD);
if (!attr)
return -EMSGSIZE;
sp = nla_data(attr);
dev_get_stats(dev, sp);
attr = nla_reserve(skb, IFLA_STATS,
sizeof(struct rtnl_link_stats));
if (!attr)
return -EMSGSIZE;
copy_rtnl_link_stats(nla_data(attr), sp);
return 0;
}
static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
struct net_device *dev,
int vfs_num,
struct nlattr *vfinfo)
{
struct ifla_vf_rss_query_en vf_rss_query_en;
struct nlattr *vf, *vfstats, *vfvlanlist;
struct ifla_vf_link_state vf_linkstate;
struct ifla_vf_vlan_info vf_vlan_info;
struct ifla_vf_spoofchk vf_spoofchk;
struct ifla_vf_tx_rate vf_tx_rate;
struct ifla_vf_stats vf_stats;
struct ifla_vf_trust vf_trust;
struct ifla_vf_vlan vf_vlan;
struct ifla_vf_rate vf_rate;
struct ifla_vf_mac vf_mac;
struct ifla_vf_info ivi;
memset(&ivi, 0, sizeof(ivi));
/* Not all SR-IOV capable drivers support the
* spoofcheck and "RSS query enable" query. Preset to
* -1 so the user space tool can detect that the driver
* didn't report anything.
*/
ivi.spoofchk = -1;
ivi.rss_query_en = -1;
ivi.trusted = -1;
/* The default value for VF link state is "auto"
* IFLA_VF_LINK_STATE_AUTO which equals zero
*/
ivi.linkstate = 0;
/* VLAN Protocol by default is 802.1Q */
ivi.vlan_proto = htons(ETH_P_8021Q);
if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
return 0;
memset(&vf_vlan_info, 0, sizeof(vf_vlan_info));
vf_mac.vf =
vf_vlan.vf =
vf_vlan_info.vf =
vf_rate.vf =
vf_tx_rate.vf =
vf_spoofchk.vf =
vf_linkstate.vf =
vf_rss_query_en.vf =
vf_trust.vf = ivi.vf;
memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
vf_vlan.vlan = ivi.vlan;
vf_vlan.qos = ivi.qos;
vf_vlan_info.vlan = ivi.vlan;
vf_vlan_info.qos = ivi.qos;
vf_vlan_info.vlan_proto = ivi.vlan_proto;
vf_tx_rate.rate = ivi.max_tx_rate;
vf_rate.min_tx_rate = ivi.min_tx_rate;
vf_rate.max_tx_rate = ivi.max_tx_rate;
vf_spoofchk.setting = ivi.spoofchk;
vf_linkstate.link_state = ivi.linkstate;
vf_rss_query_en.setting = ivi.rss_query_en;
vf_trust.setting = ivi.trusted;
vf = nla_nest_start(skb, IFLA_VF_INFO);
if (!vf)
goto nla_put_vfinfo_failure;
if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
&vf_rate) ||
nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
&vf_tx_rate) ||
nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
&vf_spoofchk) ||
nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
&vf_linkstate) ||
nla_put(skb, IFLA_VF_RSS_QUERY_EN,
sizeof(vf_rss_query_en),
&vf_rss_query_en) ||
nla_put(skb, IFLA_VF_TRUST,
sizeof(vf_trust), &vf_trust))
goto nla_put_vf_failure;
vfvlanlist = nla_nest_start(skb, IFLA_VF_VLAN_LIST);
if (!vfvlanlist)
goto nla_put_vf_failure;
if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info),
&vf_vlan_info)) {
nla_nest_cancel(skb, vfvlanlist);
goto nla_put_vf_failure;
}
nla_nest_end(skb, vfvlanlist);
memset(&vf_stats, 0, sizeof(vf_stats));
if (dev->netdev_ops->ndo_get_vf_stats)
dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
&vf_stats);
vfstats = nla_nest_start(skb, IFLA_VF_STATS);
if (!vfstats)
goto nla_put_vf_failure;
if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS,
vf_stats.rx_packets, IFLA_VF_STATS_PAD) ||
nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS,
vf_stats.tx_packets, IFLA_VF_STATS_PAD) ||
nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES,
vf_stats.rx_bytes, IFLA_VF_STATS_PAD) ||
nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES,
vf_stats.tx_bytes, IFLA_VF_STATS_PAD) ||
nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
vf_stats.multicast, IFLA_VF_STATS_PAD)) {
nla_nest_cancel(skb, vfstats);
goto nla_put_vf_failure;
}
nla_nest_end(skb, vfstats);
nla_nest_end(skb, vf);
return 0;
nla_put_vf_failure:
nla_nest_cancel(skb, vf);
nla_put_vfinfo_failure:
nla_nest_cancel(skb, vfinfo);
return -EMSGSIZE;
}
static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
{
struct rtnl_link_ifmap map;
memset(&map, 0, sizeof(map));
map.mem_start = dev->mem_start;
map.mem_end = dev->mem_end;
map.base_addr = dev->base_addr;
map.irq = dev->irq;
map.dma = dev->dma;
map.port = dev->if_port;
if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD))
return -EMSGSIZE;
return 0;
}
static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
{
const struct net_device_ops *ops = dev->netdev_ops;
const struct bpf_prog *generic_xdp_prog;
ASSERT_RTNL();
*prog_id = 0;
generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
if (generic_xdp_prog) {
*prog_id = generic_xdp_prog->aux->id;
return XDP_ATTACHED_SKB;
}
if (!ops->ndo_xdp)
return XDP_ATTACHED_NONE;
return __dev_xdp_attached(dev, ops->ndo_xdp, prog_id);
}
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
{
struct nlattr *xdp;
u32 prog_id;
int err;
xdp = nla_nest_start(skb, IFLA_XDP);
if (!xdp)
return -EMSGSIZE;
err = nla_put_u8(skb, IFLA_XDP_ATTACHED,
rtnl_xdp_attached_mode(dev, &prog_id));
if (err)
goto err_cancel;
if (prog_id) {
err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id);
if (err)
goto err_cancel;
}
nla_nest_end(skb, xdp);
return 0;
err_cancel:
nla_nest_cancel(skb, xdp);
return err;
}
static u32 rtnl_get_event(unsigned long event)
{
u32 rtnl_event_type = IFLA_EVENT_NONE;
switch (event) {
case NETDEV_REBOOT:
rtnl_event_type = IFLA_EVENT_REBOOT;
break;
case NETDEV_FEAT_CHANGE:
rtnl_event_type = IFLA_EVENT_FEATURES;
break;
case NETDEV_BONDING_FAILOVER:
rtnl_event_type = IFLA_EVENT_BONDING_FAILOVER;
break;
case NETDEV_NOTIFY_PEERS:
rtnl_event_type = IFLA_EVENT_NOTIFY_PEERS;
break;
case NETDEV_RESEND_IGMP:
rtnl_event_type = IFLA_EVENT_IGMP_RESEND;
break;
case NETDEV_CHANGEINFODATA:
rtnl_event_type = IFLA_EVENT_BONDING_OPTIONS;
break;
default:
break;
}
return rtnl_event_type;
}
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask,
u32 event)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
struct nlattr *af_spec;
struct rtnl_af_ops *af_ops;
struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
ASSERT_RTNL();
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
if (nlh == NULL)
return -EMSGSIZE;
ifm = nlmsg_data(nlh);
ifm->ifi_family = AF_UNSPEC;
ifm->__ifi_pad = 0;
ifm->ifi_type = dev->type;
ifm->ifi_index = dev->ifindex;
ifm->ifi_flags = dev_get_flags(dev);
ifm->ifi_change = change;
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
nla_put_u8(skb, IFLA_OPERSTATE,
netif_running(dev) ? dev->operstate : IF_OPER_DOWN) ||
nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) ||
nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
nla_put_u32(skb, IFLA_GROUP, dev->group) ||
nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif
(dev->ifindex != dev_get_iflink(dev) &&
nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
(upper_dev &&
nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
(dev->qdisc &&
nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
(dev->ifalias &&
nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_changes)) ||
nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
goto nla_put_failure;
if (event != IFLA_EVENT_NONE) {
if (nla_put_u32(skb, IFLA_EVENT, event))
goto nla_put_failure;
}
if (rtnl_fill_link_ifmap(skb, dev))
goto nla_put_failure;
if (dev->addr_len) {
if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
nla_put(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast))
goto nla_put_failure;
}
if (rtnl_phys_port_id_fill(skb, dev))
goto nla_put_failure;
if (rtnl_phys_port_name_fill(skb, dev))
goto nla_put_failure;
if (rtnl_phys_switch_id_fill(skb, dev))
goto nla_put_failure;
if (rtnl_fill_stats(skb, dev))
goto nla_put_failure;
if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
goto nla_put_failure;
if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
ext_filter_mask & RTEXT_FILTER_VF) {
int i;
struct nlattr *vfinfo;
int num_vfs = dev_num_vf(dev->dev.parent);
vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
if (!vfinfo)
goto nla_put_failure;
for (i = 0; i < num_vfs; i++) {
if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
goto nla_put_failure;
}
nla_nest_end(skb, vfinfo);
}
if (rtnl_port_fill(skb, dev, ext_filter_mask))
goto nla_put_failure;
if (rtnl_xdp_fill(skb, dev))
goto nla_put_failure;
if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) {
if (rtnl_link_fill(skb, dev) < 0)
goto nla_put_failure;
}
if (dev->rtnl_link_ops &&
dev->rtnl_link_ops->get_link_net) {
struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
if (!net_eq(dev_net(dev), link_net)) {
int id = peernet2id_alloc(dev_net(dev), link_net);
if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
goto nla_put_failure;
}
}
if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
goto nla_put_failure;
list_for_each_entry(af_ops, &rtnl_af_ops, list) {
if (af_ops->fill_link_af) {
struct nlattr *af;
int err;
if (!(af = nla_nest_start(skb, af_ops->family)))
goto nla_put_failure;
err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
/*
* Caller may return ENODATA to indicate that there
* was no data to be dumped. This is not an error, it
* means we should trim the attribute header and
* continue.
*/
if (err == -ENODATA)
nla_nest_cancel(skb, af);
else if (err < 0)
goto nla_put_failure;
nla_nest_end(skb, af);
}
}
nla_nest_end(skb, af_spec);
nlmsg_end(skb, nlh);
return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 },
[IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) },
[IFLA_MTU] = { .type = NLA_U32 },
[IFLA_LINK] = { .type = NLA_U32 },
[IFLA_MASTER] = { .type = NLA_U32 },
[IFLA_CARRIER] = { .type = NLA_U8 },
[IFLA_TXQLEN] = { .type = NLA_U32 },
[IFLA_WEIGHT] = { .type = NLA_U32 },
[IFLA_OPERSTATE] = { .type = NLA_U8 },
[IFLA_LINKMODE] = { .type = NLA_U8 },
[IFLA_LINKINFO] = { .type = NLA_NESTED },
[IFLA_NET_NS_PID] = { .type = NLA_U32 },
[IFLA_NET_NS_FD] = { .type = NLA_U32 },
/* IFLA_IFALIAS is a string, but policy is set to NLA_BINARY to
* allow 0-length string (needed to remove an alias).
*/
[IFLA_IFALIAS] = { .type = NLA_BINARY, .len = IFALIASZ - 1 },
[IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
[IFLA_PORT_SELF] = { .type = NLA_NESTED },
[IFLA_AF_SPEC] = { .type = NLA_NESTED },
[IFLA_EXT_MASK] = { .type = NLA_U32 },
[IFLA_PROMISCUITY] = { .type = NLA_U32 },
[IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
[IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
[IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_LINK_NETNSID] = { .type = NLA_S32 },
[IFLA_PROTO_DOWN] = { .type = NLA_U8 },
[IFLA_XDP] = { .type = NLA_NESTED },
[IFLA_EVENT] = { .type = NLA_U32 },
[IFLA_GROUP] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
[IFLA_INFO_KIND] = { .type = NLA_STRING },
[IFLA_INFO_DATA] = { .type = NLA_NESTED },
[IFLA_INFO_SLAVE_KIND] = { .type = NLA_STRING },
[IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED },
};
static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
[IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) },
[IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) },
[IFLA_VF_VLAN_LIST] = { .type = NLA_NESTED },
[IFLA_VF_TX_RATE] = { .len = sizeof(struct ifla_vf_tx_rate) },
[IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) },
[IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) },
[IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) },
[IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) },
[IFLA_VF_STATS] = { .type = NLA_NESTED },
[IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) },
[IFLA_VF_IB_NODE_GUID] = { .len = sizeof(struct ifla_vf_guid) },
[IFLA_VF_IB_PORT_GUID] = { .len = sizeof(struct ifla_vf_guid) },
};
static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
[IFLA_PORT_VF] = { .type = NLA_U32 },
[IFLA_PORT_PROFILE] = { .type = NLA_STRING,
.len = PORT_PROFILE_MAX },
[IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY,
.len = PORT_UUID_MAX },
[IFLA_PORT_HOST_UUID] = { .type = NLA_STRING,
.len = PORT_UUID_MAX },
[IFLA_PORT_REQUEST] = { .type = NLA_U8, },
[IFLA_PORT_RESPONSE] = { .type = NLA_U16, },
/* Unused, but we need to keep it here since user space could
* fill it. It's also broken with regard to NLA_BINARY use in
* combination with structs.
*/
[IFLA_PORT_VSI_TYPE] = { .type = NLA_BINARY,
.len = sizeof(struct ifla_port_vsi) },
};
static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
[IFLA_XDP_FD] = { .type = NLA_S32 },
[IFLA_XDP_ATTACHED] = { .type = NLA_U8 },
[IFLA_XDP_FLAGS] = { .type = NLA_U32 },
[IFLA_XDP_PROG_ID] = { .type = NLA_U32 },
};
static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
{
const struct rtnl_link_ops *ops = NULL;
struct nlattr *linfo[IFLA_INFO_MAX + 1];
if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla,
ifla_info_policy, NULL) < 0)
return NULL;
if (linfo[IFLA_INFO_KIND]) {
char kind[MODULE_NAME_LEN];
nla_strlcpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
ops = rtnl_link_ops_get(kind);
}
return ops;
}
static bool link_master_filtered(struct net_device *dev, int master_idx)
{
struct net_device *master;
if (!master_idx)
return false;
master = netdev_master_upper_dev_get(dev);
if (!master || master->ifindex != master_idx)
return true;
return false;
}
static bool link_kind_filtered(const struct net_device *dev,
const struct rtnl_link_ops *kind_ops)
{
if (kind_ops && dev->rtnl_link_ops != kind_ops)
return true;
return false;
}
static bool link_dump_filtered(struct net_device *dev,
int master_idx,
const struct rtnl_link_ops *kind_ops)
{
if (link_master_filtered(dev, master_idx) ||
link_kind_filtered(dev, kind_ops))
return true;
return false;
}
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
int h, s_h;
int idx = 0, s_idx;
struct net_device *dev;
struct hlist_head *head;
struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
const struct rtnl_link_ops *kind_ops = NULL;
unsigned int flags = NLM_F_MULTI;
int master_idx = 0;
int err;
int hdrlen;
s_h = cb->args[0];
s_idx = cb->args[1];
/* A hack to preserve kernel<->userspace interface.
* The correct header is ifinfomsg. It is consistent with rtnl_getlink.
* However, before Linux v3.9 the code here assumed rtgenmsg and that's
* what iproute2 < v3.9.0 used.
* We can detect the old iproute2. Even including the IFLA_EXT_MASK
* attribute, its netlink message is shorter than struct ifinfomsg.
*/
hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ?
sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX,
ifla_policy, NULL) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
if (tb[IFLA_MASTER])
master_idx = nla_get_u32(tb[IFLA_MASTER]);
if (tb[IFLA_LINKINFO])
kind_ops = linkinfo_to_kind_ops(tb[IFLA_LINKINFO]);
if (master_idx || kind_ops)
flags |= NLM_F_DUMP_FILTERED;
}
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
hlist_for_each_entry(dev, head, index_hlist) {
if (link_dump_filtered(dev, master_idx, kind_ops))
goto cont;
if (idx < s_idx)
goto cont;
err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
flags,
ext_filter_mask, 0);
if (err < 0) {
if (likely(skb->len))
goto out;
goto out_err;
}
cont:
idx++;
}
}
out:
err = skb->len;
out_err:
cb->args[1] = idx;
cb->args[0] = h;
cb->seq = net->dev_base_seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
return err;
}
int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
struct netlink_ext_ack *exterr)
{
return nla_parse(tb, IFLA_MAX, head, len, ifla_policy, exterr);
}
EXPORT_SYMBOL(rtnl_nla_parse_ifla);
struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
{
struct net *net;
/* Examine the link attributes and figure out which
* network namespace we are talking about.
*/
if (tb[IFLA_NET_NS_PID])
net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
else if (tb[IFLA_NET_NS_FD])
net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
else
net = get_net(src_net);
return net;
}
EXPORT_SYMBOL(rtnl_link_get_net);
static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
{
if (dev) {
if (tb[IFLA_ADDRESS] &&
nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
return -EINVAL;
if (tb[IFLA_BROADCAST] &&
nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
return -EINVAL;
}
if (tb[IFLA_AF_SPEC]) {
struct nlattr *af;
int rem, err;
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
if (!(af_ops = rtnl_af_lookup(nla_type(af))))
return -EAFNOSUPPORT;
if (!af_ops->set_link_af)
return -EOPNOTSUPP;
if (af_ops->validate_link_af) {
err = af_ops->validate_link_af(dev, af);
if (err < 0)
return err;
}
}
}
return 0;
}
static int handle_infiniband_guid(struct net_device *dev, struct ifla_vf_guid *ivt,
int guid_type)
{
const struct net_device_ops *ops = dev->netdev_ops;
return ops->ndo_set_vf_guid(dev, ivt->vf, ivt->guid, guid_type);
}
static int handle_vf_guid(struct net_device *dev, struct ifla_vf_guid *ivt, int guid_type)
{
if (dev->type != ARPHRD_INFINIBAND)
return -EOPNOTSUPP;
return handle_infiniband_guid(dev, ivt, guid_type);
}
static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
{
const struct net_device_ops *ops = dev->netdev_ops;
int err = -EINVAL;
if (tb[IFLA_VF_MAC]) {
struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]);
if (ivm->vf >= INT_MAX)
return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_mac)
err = ops->ndo_set_vf_mac(dev, ivm->vf,
ivm->mac);
if (err < 0)
return err;
}
if (tb[IFLA_VF_VLAN]) {
struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]);
if (ivv->vf >= INT_MAX)
return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_vlan)
err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
ivv->qos,
htons(ETH_P_8021Q));
if (err < 0)
return err;
}
if (tb[IFLA_VF_VLAN_LIST]) {
struct ifla_vf_vlan_info *ivvl[MAX_VLAN_LIST_LEN];
struct nlattr *attr;
int rem, len = 0;
err = -EOPNOTSUPP;
if (!ops->ndo_set_vf_vlan)
return err;
nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) {
if (nla_type(attr) != IFLA_VF_VLAN_INFO ||
nla_len(attr) < sizeof(struct ifla_vf_vlan_info)) {
return -EINVAL;
}
if (len >= MAX_VLAN_LIST_LEN)
return -EOPNOTSUPP;
ivvl[len] = nla_data(attr);
len++;
}
if (len == 0)
return -EINVAL;
if (ivvl[0]->vf >= INT_MAX)
return -EINVAL;
err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan,
ivvl[0]->qos, ivvl[0]->vlan_proto);
if (err < 0)
return err;
}
if (tb[IFLA_VF_TX_RATE]) {
struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]);
struct ifla_vf_info ivf;
if (ivt->vf >= INT_MAX)
return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_get_vf_config)
err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf);
if (err < 0)
return err;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_rate)
err = ops->ndo_set_vf_rate(dev, ivt->vf,
ivf.min_tx_rate,
ivt->rate);
if (err < 0)
return err;
}
if (tb[IFLA_VF_RATE]) {
struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]);
if (ivt->vf >= INT_MAX)
return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_rate)
err = ops->ndo_set_vf_rate(dev, ivt->vf,
ivt->min_tx_rate,
ivt->max_tx_rate);
if (err < 0)
return err;
}
if (tb[IFLA_VF_SPOOFCHK]) {
struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]);
if (ivs->vf >= INT_MAX)
return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_spoofchk)
err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
ivs->setting);
if (err < 0)
return err;
}
if (tb[IFLA_VF_LINK_STATE]) {
struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]);
if (ivl->vf >= INT_MAX)
return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_link_state)
err = ops->ndo_set_vf_link_state(dev, ivl->vf,
ivl->link_state);
if (err < 0)
return err;
}
if (tb[IFLA_VF_RSS_QUERY_EN]) {
struct ifla_vf_rss_query_en *ivrssq_en;
err = -EOPNOTSUPP;
ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]);
if (ivrssq_en->vf >= INT_MAX)
return -EINVAL;
if (ops->ndo_set_vf_rss_query_en)
err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf,
ivrssq_en->setting);
if (err < 0)
return err;
}
if (tb[IFLA_VF_TRUST]) {
struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]);
if (ivt->vf >= INT_MAX)
return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_trust)
err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting);
if (err < 0)
return err;
}
if (tb[IFLA_VF_IB_NODE_GUID]) {
struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]);
if (ivt->vf >= INT_MAX)
return -EINVAL;
if (!ops->ndo_set_vf_guid)
return -EOPNOTSUPP;
return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID);
}
if (tb[IFLA_VF_IB_PORT_GUID]) {
struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]);
if (ivt->vf >= INT_MAX)
return -EINVAL;
if (!ops->ndo_set_vf_guid)
return -EOPNOTSUPP;
return handle_vf_guid(dev, ivt, IFLA_VF_IB_PORT_GUID);
}
return err;
}
static int do_set_master(struct net_device *dev, int ifindex)
{
struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
const struct net_device_ops *ops;
int err;
if (upper_dev) {
if (upper_dev->ifindex == ifindex)
return 0;
ops = upper_dev->netdev_ops;
if (ops->ndo_del_slave) {
err = ops->ndo_del_slave(upper_dev, dev);
if (err)
return err;
} else {
return -EOPNOTSUPP;
}
}
if (ifindex) {
upper_dev = __dev_get_by_index(dev_net(dev), ifindex);
if (!upper_dev)
return -EINVAL;
ops = upper_dev->netdev_ops;
if (ops->ndo_add_slave) {
err = ops->ndo_add_slave(upper_dev, dev);
if (err)
return err;
} else {
return -EOPNOTSUPP;
}
}
return 0;
}
#define DO_SETLINK_MODIFIED 0x01
/* notify flag means notify + modified. */
#define DO_SETLINK_NOTIFY 0x03
static int do_setlink(const struct sk_buff *skb,
struct net_device *dev, struct ifinfomsg *ifm,
struct netlink_ext_ack *extack,
struct nlattr **tb, char *ifname, int status)
{
const struct net_device_ops *ops = dev->netdev_ops;
int err;
err = validate_linkmsg(dev, tb);
if (err < 0)
return err;
if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
struct net *net = rtnl_link_get_net(dev_net(dev), tb);
if (IS_ERR(net)) {
err = PTR_ERR(net);
goto errout;
}
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
put_net(net);
err = -EPERM;
goto errout;
}
err = dev_change_net_namespace(dev, net, ifname);
put_net(net);
if (err)
goto errout;
status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_MAP]) {
struct rtnl_link_ifmap *u_map;
struct ifmap k_map;
if (!ops->ndo_set_config) {
err = -EOPNOTSUPP;
goto errout;
}
if (!netif_device_present(dev)) {
err = -ENODEV;
goto errout;
}
u_map = nla_data(tb[IFLA_MAP]);
k_map.mem_start = (unsigned long) u_map->mem_start;
k_map.mem_end = (unsigned long) u_map->mem_end;
k_map.base_addr = (unsigned short) u_map->base_addr;
k_map.irq = (unsigned char) u_map->irq;
k_map.dma = (unsigned char) u_map->dma;
k_map.port = (unsigned char) u_map->port;
err = ops->ndo_set_config(dev, &k_map);
if (err < 0)
goto errout;
status |= DO_SETLINK_NOTIFY;
}
if (tb[IFLA_ADDRESS]) {
struct sockaddr *sa;
int len;
len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len,
sizeof(*sa));
sa = kmalloc(len, GFP_KERNEL);
if (!sa) {
err = -ENOMEM;
goto errout;
}
sa->sa_family = dev->type;
memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
dev->addr_len);
err = dev_set_mac_address(dev, sa);
kfree(sa);
if (err)
goto errout;
status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_MTU]) {
err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
if (err < 0)
goto errout;
status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_GROUP]) {
dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
status |= DO_SETLINK_NOTIFY;
}
/*
* Interface selected by interface index but interface
* name provided implies that a name change has been
* requested.
*/
if (ifm->ifi_index > 0 && ifname[0]) {
err = dev_change_name(dev, ifname);
if (err < 0)
goto errout;
status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_IFALIAS]) {
err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]),
nla_len(tb[IFLA_IFALIAS]));
if (err < 0)
goto errout;
status |= DO_SETLINK_NOTIFY;
}
if (tb[IFLA_BROADCAST]) {
nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
}
if (ifm->ifi_flags || ifm->ifi_change) {
err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
if (err < 0)
goto errout;
}
if (tb[IFLA_MASTER]) {
err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
if (err)
goto errout;
status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_CARRIER]) {
err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER]));
if (err)
goto errout;
status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_TXQLEN]) {
unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]);
unsigned int orig_len = dev->tx_queue_len;
if (dev->tx_queue_len ^ value) {
dev->tx_queue_len = value;
err = call_netdevice_notifiers(
NETDEV_CHANGE_TX_QUEUE_LEN, dev);
err = notifier_to_errno(err);
if (err) {
dev->tx_queue_len = orig_len;
goto errout;
}
status |= DO_SETLINK_MODIFIED;
}
}
if (tb[IFLA_OPERSTATE])
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
if (tb[IFLA_LINKMODE]) {
unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]);
write_lock_bh(&dev_base_lock);
if (dev->link_mode ^ value)
status |= DO_SETLINK_NOTIFY;
dev->link_mode = value;
write_unlock_bh(&dev_base_lock);
}
if (tb[IFLA_VFINFO_LIST]) {
struct nlattr *vfinfo[IFLA_VF_MAX + 1];
struct nlattr *attr;
int rem;
nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
if (nla_type(attr) != IFLA_VF_INFO ||
nla_len(attr) < NLA_HDRLEN) {
err = -EINVAL;
goto errout;
}
err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr,
ifla_vf_policy, NULL);
if (err < 0)
goto errout;
err = do_setvfinfo(dev, vfinfo);
if (err < 0)
goto errout;
status |= DO_SETLINK_NOTIFY;
}
}
err = 0;
if (tb[IFLA_VF_PORTS]) {
struct nlattr *port[IFLA_PORT_MAX+1];
struct nlattr *attr;
int vf;
int rem;
err = -EOPNOTSUPP;
if (!ops->ndo_set_vf_port)
goto errout;
nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) {
if (nla_type(attr) != IFLA_VF_PORT ||
nla_len(attr) < NLA_HDRLEN) {
err = -EINVAL;
goto errout;
}
err = nla_parse_nested(port, IFLA_PORT_MAX, attr,
ifla_port_policy, NULL);
if (err < 0)
goto errout;
if (!port[IFLA_PORT_VF]) {
err = -EOPNOTSUPP;
goto errout;
}
vf = nla_get_u32(port[IFLA_PORT_VF]);
err = ops->ndo_set_vf_port(dev, vf, port);
if (err < 0)
goto errout;
status |= DO_SETLINK_NOTIFY;
}
}
err = 0;
if (tb[IFLA_PORT_SELF]) {
struct nlattr *port[IFLA_PORT_MAX+1];
err = nla_parse_nested(port, IFLA_PORT_MAX,
tb[IFLA_PORT_SELF], ifla_port_policy,
NULL);
if (err < 0)
goto errout;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_port)
err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port);
if (err < 0)
goto errout;
status |= DO_SETLINK_NOTIFY;
}
if (tb[IFLA_AF_SPEC]) {
struct nlattr *af;
int rem;
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
if (!(af_ops = rtnl_af_lookup(nla_type(af))))
BUG();
err = af_ops->set_link_af(dev, af);
if (err < 0)
goto errout;
status |= DO_SETLINK_NOTIFY;
}
}
err = 0;
if (tb[IFLA_PROTO_DOWN]) {
err = dev_change_proto_down(dev,
nla_get_u8(tb[IFLA_PROTO_DOWN]));
if (err)
goto errout;
status |= DO_SETLINK_NOTIFY;
}
if (tb[IFLA_XDP]) {
struct nlattr *xdp[IFLA_XDP_MAX + 1];
u32 xdp_flags = 0;
err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP],
ifla_xdp_policy, NULL);
if (err < 0)
goto errout;
if (xdp[IFLA_XDP_ATTACHED] || xdp[IFLA_XDP_PROG_ID]) {
err = -EINVAL;
goto errout;
}
if (xdp[IFLA_XDP_FLAGS]) {
xdp_flags = nla_get_u32(xdp[IFLA_XDP_FLAGS]);
if (xdp_flags & ~XDP_FLAGS_MASK) {
err = -EINVAL;
goto errout;
}
if (hweight32(xdp_flags & XDP_FLAGS_MODES) > 1) {
err = -EINVAL;
goto errout;
}
}
if (xdp[IFLA_XDP_FD]) {
err = dev_change_xdp_fd(dev, extack,
nla_get_s32(xdp[IFLA_XDP_FD]),
xdp_flags);
if (err)
goto errout;
status |= DO_SETLINK_NOTIFY;
}
}
errout:
if (status & DO_SETLINK_MODIFIED) {
if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY)
netdev_state_change(dev);
if (err < 0)
net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n",
dev->name);
}
return err;
}
static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
struct net_device *dev;
int err;
struct nlattr *tb[IFLA_MAX+1];
char ifname[IFNAMSIZ];
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy,
extack);
if (err < 0)
goto errout;
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
ifname[0] = '\0';
err = -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
dev = __dev_get_by_name(net, ifname);
else
goto errout;
if (dev == NULL) {
err = -ENODEV;
goto errout;
}
err = do_setlink(skb, dev, ifm, extack, tb, ifname, 0);
errout:
return err;
}
static int rtnl_group_dellink(const struct net *net, int group)
{
struct net_device *dev, *aux;
LIST_HEAD(list_kill);
bool found = false;
if (!group)
return -EPERM;
for_each_netdev(net, dev) {
if (dev->group == group) {
const struct rtnl_link_ops *ops;
found = true;
ops = dev->rtnl_link_ops;
if (!ops || !ops->dellink)
return -EOPNOTSUPP;
}
}
if (!found)
return -ENODEV;
for_each_netdev_safe(net, dev, aux) {
if (dev->group == group) {
const struct rtnl_link_ops *ops;
ops = dev->rtnl_link_ops;
ops->dellink(dev, &list_kill);
}
}
unregister_netdevice_many(&list_kill);
return 0;
}
int rtnl_delete_link(struct net_device *dev)
{
const struct rtnl_link_ops *ops;
LIST_HEAD(list_kill);
ops = dev->rtnl_link_ops;
if (!ops || !ops->dellink)
return -EOPNOTSUPP;
ops->dellink(dev, &list_kill);
unregister_netdevice_many(&list_kill);
return 0;
}
EXPORT_SYMBOL_GPL(rtnl_delete_link);
static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_device *dev;
struct ifinfomsg *ifm;
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
return err;
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
dev = __dev_get_by_name(net, ifname);
else if (tb[IFLA_GROUP])
return rtnl_group_dellink(net, nla_get_u32(tb[IFLA_GROUP]));
else
return -EINVAL;
if (!dev)
return -ENODEV;
return rtnl_delete_link(dev);
}
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
{
unsigned int old_flags;
int err;
old_flags = dev->flags;
if (ifm && (ifm->ifi_flags || ifm->ifi_change)) {
err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
if (err < 0)
return err;
}
if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
__dev_notify_flags(dev, old_flags, (old_flags ^ dev->flags));
} else {
dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
__dev_notify_flags(dev, old_flags, ~0U);
}
return 0;
}
EXPORT_SYMBOL(rtnl_configure_link);
struct net_device *rtnl_create_link(struct net *net,
const char *ifname, unsigned char name_assign_type,
const struct rtnl_link_ops *ops, struct nlattr *tb[])
{
struct net_device *dev;
unsigned int num_tx_queues = 1;
unsigned int num_rx_queues = 1;
if (tb[IFLA_NUM_TX_QUEUES])
num_tx_queues = nla_get_u32(tb[IFLA_NUM_TX_QUEUES]);
else if (ops->get_num_tx_queues)
num_tx_queues = ops->get_num_tx_queues();
if (tb[IFLA_NUM_RX_QUEUES])
num_rx_queues = nla_get_u32(tb[IFLA_NUM_RX_QUEUES]);
else if (ops->get_num_rx_queues)
num_rx_queues = ops->get_num_rx_queues();
if (num_tx_queues < 1 || num_tx_queues > 4096)
return ERR_PTR(-EINVAL);
if (num_rx_queues < 1 || num_rx_queues > 4096)
return ERR_PTR(-EINVAL);
dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
ops->setup, num_tx_queues, num_rx_queues);
if (!dev)
return ERR_PTR(-ENOMEM);
dev_net_set(dev, net);
dev->rtnl_link_ops = ops;
dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
if (tb[IFLA_MTU]) {
u32 mtu = nla_get_u32(tb[IFLA_MTU]);
int err;
err = dev_validate_mtu(dev, mtu);
if (err) {
free_netdev(dev);
return ERR_PTR(err);
}
dev->mtu = mtu;
}
if (tb[IFLA_ADDRESS]) {
memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
nla_len(tb[IFLA_ADDRESS]));
dev->addr_assign_type = NET_ADDR_SET;
}
if (tb[IFLA_BROADCAST])
memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]),
nla_len(tb[IFLA_BROADCAST]));
if (tb[IFLA_TXQLEN])
dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
if (tb[IFLA_OPERSTATE])
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
if (tb[IFLA_LINKMODE])
dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
if (tb[IFLA_GROUP])
dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
return dev;
}
EXPORT_SYMBOL(rtnl_create_link);
static int rtnl_group_changelink(const struct sk_buff *skb,
struct net *net, int group,
struct ifinfomsg *ifm,
struct netlink_ext_ack *extack,
struct nlattr **tb)
{
struct net_device *dev, *aux;
int err;
for_each_netdev_safe(net, dev, aux) {
if (dev->group == group) {
err = do_setlink(skb, dev, ifm, extack, tb, NULL, 0);
if (err < 0)
return err;
}
}
return 0;
}
static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
const struct rtnl_link_ops *m_ops;
struct net_device *dev;
struct net_device *master_dev;
struct ifinfomsg *ifm;
char kind[MODULE_NAME_LEN];
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
struct nlattr *linkinfo[IFLA_INFO_MAX+1];
unsigned char name_assign_type = NET_NAME_USER;
int err;
#ifdef CONFIG_MODULES
replay:
#endif
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
return err;
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
ifname[0] = '\0';
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0) {
dev = __dev_get_by_index(net, ifm->ifi_index);
} else if (ifm->ifi_index < 0) {
NL_SET_ERR_MSG(extack, "ifindex can't be negative");
return -EINVAL;
} else {
if (ifname[0])
dev = __dev_get_by_name(net, ifname);
else
dev = NULL;
}
master_dev = NULL;
m_ops = NULL;
if (dev) {
master_dev = netdev_master_upper_dev_get(dev);
if (master_dev)
m_ops = master_dev->rtnl_link_ops;
}
err = validate_linkmsg(dev, tb);
if (err < 0)
return err;
if (tb[IFLA_LINKINFO]) {
err = nla_parse_nested(linkinfo, IFLA_INFO_MAX,
tb[IFLA_LINKINFO], ifla_info_policy,
NULL);
if (err < 0)
return err;
} else
memset(linkinfo, 0, sizeof(linkinfo));
if (linkinfo[IFLA_INFO_KIND]) {
nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
ops = rtnl_link_ops_get(kind);
} else {
kind[0] = '\0';
ops = NULL;
}
if (1) {
struct nlattr *attr[RTNL_MAX_TYPE + 1];
struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
struct nlattr **data = NULL;
struct nlattr **slave_data = NULL;
struct net *dest_net, *link_net = NULL;
if (ops) {
if (ops->maxtype > RTNL_MAX_TYPE)
return -EINVAL;
if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
err = nla_parse_nested(attr, ops->maxtype,
linkinfo[IFLA_INFO_DATA],
ops->policy, NULL);
if (err < 0)
return err;
data = attr;
}
if (ops->validate) {
err = ops->validate(tb, data, extack);
if (err < 0)
return err;
}
}
if (m_ops) {
if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
return -EINVAL;
if (m_ops->slave_maxtype &&
linkinfo[IFLA_INFO_SLAVE_DATA]) {
err = nla_parse_nested(slave_attr,
m_ops->slave_maxtype,
linkinfo[IFLA_INFO_SLAVE_DATA],
m_ops->slave_policy,
NULL);
if (err < 0)
return err;
slave_data = slave_attr;
}
if (m_ops->slave_validate) {
err = m_ops->slave_validate(tb, slave_data,
extack);
if (err < 0)
return err;
}
}
if (dev) {
int status = 0;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
if (linkinfo[IFLA_INFO_DATA]) {
if (!ops || ops != dev->rtnl_link_ops ||
!ops->changelink)
return -EOPNOTSUPP;
err = ops->changelink(dev, tb, data, extack);
if (err < 0)
return err;
status |= DO_SETLINK_NOTIFY;
}
if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
if (!m_ops || !m_ops->slave_changelink)
return -EOPNOTSUPP;
err = m_ops->slave_changelink(master_dev, dev,
tb, slave_data,
extack);
if (err < 0)
return err;
status |= DO_SETLINK_NOTIFY;
}
return do_setlink(skb, dev, ifm, extack, tb, ifname,
status);
}
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
return rtnl_group_changelink(skb, net,
nla_get_u32(tb[IFLA_GROUP]),
ifm, extack, tb);
return -ENODEV;
}
if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
return -EOPNOTSUPP;
if (!ops) {
#ifdef CONFIG_MODULES
if (kind[0]) {
__rtnl_unlock();
request_module("rtnl-link-%s", kind);
rtnl_lock();
ops = rtnl_link_ops_get(kind);
if (ops)
goto replay;
}
#endif
return -EOPNOTSUPP;
}
if (!ops->setup)
return -EOPNOTSUPP;
if (!ifname[0]) {
snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
name_assign_type = NET_NAME_ENUM;
}
dest_net = rtnl_link_get_net(net, tb);
if (IS_ERR(dest_net))
return PTR_ERR(dest_net);
err = -EPERM;
if (!netlink_ns_capable(skb, dest_net->user_ns, CAP_NET_ADMIN))
goto out;
if (tb[IFLA_LINK_NETNSID]) {
int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
link_net = get_net_ns_by_id(dest_net, id);
if (!link_net) {
err = -EINVAL;
goto out;
}
err = -EPERM;
if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
goto out;
}
dev = rtnl_create_link(link_net ? : dest_net, ifname,
name_assign_type, ops, tb);
if (IS_ERR(dev)) {
err = PTR_ERR(dev);
goto out;
}
dev->ifindex = ifm->ifi_index;
if (ops->newlink) {
err = ops->newlink(link_net ? : net, dev, tb, data,
extack);
/* Drivers should call free_netdev() in ->destructor
* and unregister it on failure after registration
* so that device could be finally freed in rtnl_unlock.
*/
if (err < 0) {
/* If device is not registered at all, free it now */
if (dev->reg_state == NETREG_UNINITIALIZED ||
dev->reg_state == NETREG_UNREGISTERED)
free_netdev(dev);
goto out;
}
} else {
err = register_netdevice(dev);
if (err < 0) {
free_netdev(dev);
goto out;
}
}
err = rtnl_configure_link(dev, ifm);
if (err < 0)
goto out_unregister;
if (link_net) {
err = dev_change_net_namespace(dev, dest_net, ifname);
if (err < 0)
goto out_unregister;
}
if (tb[IFLA_MASTER]) {
err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
if (err)
goto out_unregister;
}
out:
if (link_net)
put_net(link_net);
put_net(dest_net);
return err;
out_unregister:
if (ops->newlink) {
LIST_HEAD(list_kill);
ops->dellink(dev, &list_kill);
unregister_netdevice_many(&list_kill);
} else {
unregister_netdevice(dev);
}
goto out;
}
}
static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
struct net_device *dev = NULL;
struct sk_buff *nskb;
int err;
u32 ext_filter_mask = 0;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
return err;
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
dev = __dev_get_by_name(net, ifname);
else
return -EINVAL;
if (dev == NULL)
return -ENODEV;
nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
if (nskb == NULL)
return -ENOBUFS;
err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON(err == -EMSGSIZE);
kfree_skb(nskb);
} else
err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
return err;
}
static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct net_device *dev;
struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
u16 min_ifinfo_dump_size = 0;
int hdrlen;
/* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */
hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
}
if (!ext_filter_mask)
return NLMSG_GOODSIZE;
/*
* traverse the list of net devices and compute the minimum
* buffer size based upon the filter mask.
*/
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
if_nlmsg_size(dev,
ext_filter_mask));
}
rcu_read_unlock();
return nlmsg_total_size(min_ifinfo_dump_size);
}
static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
int s_idx = cb->family;
if (s_idx == 0)
s_idx = 1;
for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
int type = cb->nlh->nlmsg_type-RTM_BASE;
struct rtnl_link *handlers;
rtnl_dumpit_func dumpit;
if (idx < s_idx || idx == PF_PACKET)
continue;
handlers = rtnl_dereference(rtnl_msg_handlers[idx]);
if (!handlers)
continue;
dumpit = READ_ONCE(handlers[type].dumpit);
if (!dumpit)
continue;
if (idx > s_idx) {
memset(&cb->args[0], 0, sizeof(cb->args));
cb->prev_seq = 0;
cb->seq = 0;
}
if (dumpit(skb, cb))
break;
}
cb->family = idx;
return skb->len;
}
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned int change,
u32 event, gfp_t flags)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
size_t if_info_size;
skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags);
if (skb == NULL)
goto errout;
err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout;
}
return skb;
errout:
if (err < 0)
rtnl_set_sk_err(net, RTNLGRP_LINK, err);
return NULL;
}
void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
{
struct net *net = dev_net(dev);
rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
}
static void rtmsg_ifinfo_event(int type, struct net_device *dev,
unsigned int change, u32 event,
gfp_t flags)
{
struct sk_buff *skb;
if (dev->reg_state != NETREG_REGISTERED)
return;
skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags);
if (skb)
rtmsg_ifinfo_send(skb, dev, flags);
}
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
gfp_t flags)
{
rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags);
}
EXPORT_SYMBOL(rtmsg_ifinfo);
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
u8 *addr, u16 vid, u32 pid, u32 seq,
int type, unsigned int flags,
int nlflags, u16 ndm_state)
{
struct nlmsghdr *nlh;
struct ndmsg *ndm;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), nlflags);
if (!nlh)
return -EMSGSIZE;
ndm = nlmsg_data(nlh);
ndm->ndm_family = AF_BRIDGE;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
ndm->ndm_flags = flags;
ndm->ndm_type = 0;
ndm->ndm_ifindex = dev->ifindex;
ndm->ndm_state = ndm_state;
if (nla_put(skb, NDA_LLADDR, dev->addr_len, addr))
goto nla_put_failure;
if (vid)
if (nla_put(skb, NDA_VLAN, sizeof(u16), &vid))
goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
static inline size_t rtnl_fdb_nlmsg_size(const struct net_device *dev)
{
return NLMSG_ALIGN(sizeof(struct ndmsg)) +
nla_total_size(dev->addr_len) + /* NDA_LLADDR */
nla_total_size(sizeof(u16)) + /* NDA_VLAN */
0;
}
static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type,
u16 ndm_state)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
skb = nlmsg_new(rtnl_fdb_nlmsg_size(dev), GFP_ATOMIC);
if (!skb)
goto errout;
err = nlmsg_populate_fdb_fill(skb, dev, addr, vid,
0, 0, type, NTF_SELF, 0, ndm_state);
if (err < 0) {
kfree_skb(skb);
goto errout;
}
rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
return;
errout:
rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}
/**
* ndo_dflt_fdb_add - default netdevice operation to add an FDB entry
*/
int ndo_dflt_fdb_add(struct ndmsg *ndm,
struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid,
u16 flags)
{
int err = -EINVAL;
/* If aging addresses are supported device will need to
* implement its own handler for this.
*/
if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
pr_info("%s: FDB only supports static addresses\n", dev->name);
return err;
}
if (vid) {
pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
return err;
}
if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
err = dev_uc_add_excl(dev, addr);
else if (is_multicast_ether_addr(addr))
err = dev_mc_add_excl(dev, addr);
/* Only return duplicate errors if NLM_F_EXCL is set */
if (err == -EEXIST && !(flags & NLM_F_EXCL))
err = 0;
return err;
}
EXPORT_SYMBOL(ndo_dflt_fdb_add);
static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid)
{
u16 vid = 0;
if (vlan_attr) {
if (nla_len(vlan_attr) != sizeof(u16)) {
pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n");
return -EINVAL;
}
vid = nla_get_u16(vlan_attr);
if (!vid || vid >= VLAN_VID_MASK) {
pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n",
vid);
return -EINVAL;
}
}
*p_vid = vid;
return 0;
}
static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
struct net_device *dev;
u8 *addr;
u16 vid;
int err;
err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
if (err < 0)
return err;
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex == 0) {
pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n");
return -EINVAL;
}
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n");
return -ENODEV;
}
if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n");
return -EINVAL;
}
if (dev->type != ARPHRD_ETHER) {
NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices");
return -EINVAL;
}
addr = nla_data(tb[NDA_LLADDR]);
err = fdb_vid_parse(tb[NDA_VLAN], &vid);
if (err)
return err;
err = -EOPNOTSUPP;
/* Support fdb on master device the net/bridge default case */
if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
(dev->priv_flags & IFF_BRIDGE_PORT)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
const struct net_device_ops *ops = br_dev->netdev_ops;
err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid,
nlh->nlmsg_flags);
if (err)
goto out;
else
ndm->ndm_flags &= ~NTF_MASTER;
}
/* Embedded bridge, macvlan, and any other device support */
if ((ndm->ndm_flags & NTF_SELF)) {
if (dev->netdev_ops->ndo_fdb_add)
err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr,
vid,
nlh->nlmsg_flags);
else
err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid,
nlh->nlmsg_flags);
if (!err) {
rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH,
ndm->ndm_state);
ndm->ndm_flags &= ~NTF_SELF;
}
}
out:
return err;
}
/**
* ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry
*/
int ndo_dflt_fdb_del(struct ndmsg *ndm,
struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid)
{
int err = -EINVAL;
/* If aging addresses are supported device will need to
* implement its own handler for this.
*/
if (!(ndm->ndm_state & NUD_PERMANENT)) {
pr_info("%s: FDB only supports static addresses\n", dev->name);
return err;
}
if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
err = dev_uc_del(dev, addr);
else if (is_multicast_ether_addr(addr))
err = dev_mc_del(dev, addr);
return err;
}
EXPORT_SYMBOL(ndo_dflt_fdb_del);
static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
struct net_device *dev;
int err = -EINVAL;
__u8 *addr;
u16 vid;
if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
if (err < 0)
return err;
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex == 0) {
pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n");
return -EINVAL;
}
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n");
return -ENODEV;
}
if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n");
return -EINVAL;
}
if (dev->type != ARPHRD_ETHER) {
NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices");
return -EINVAL;
}
addr = nla_data(tb[NDA_LLADDR]);
err = fdb_vid_parse(tb[NDA_VLAN], &vid);
if (err)
return err;
err = -EOPNOTSUPP;
/* Support fdb on master device the net/bridge default case */
if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
(dev->priv_flags & IFF_BRIDGE_PORT)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
const struct net_device_ops *ops = br_dev->netdev_ops;
if (ops->ndo_fdb_del)
err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid);
if (err)
goto out;
else
ndm->ndm_flags &= ~NTF_MASTER;
}
/* Embedded bridge, macvlan, and any other device support */
if (ndm->ndm_flags & NTF_SELF) {
if (dev->netdev_ops->ndo_fdb_del)
err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr,
vid);
else
err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
if (!err) {
rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH,
ndm->ndm_state);
ndm->ndm_flags &= ~NTF_SELF;
}
}
out:
return err;
}
static int nlmsg_populate_fdb(struct sk_buff *skb,
struct netlink_callback *cb,
struct net_device *dev,
int *idx,
struct netdev_hw_addr_list *list)
{
struct netdev_hw_addr *ha;
int err;
u32 portid, seq;
portid = NETLINK_CB(cb->skb).portid;
seq = cb->nlh->nlmsg_seq;
list_for_each_entry(ha, &list->list, list) {
if (*idx < cb->args[2])
goto skip;
err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0,
portid, seq,
RTM_NEWNEIGH, NTF_SELF,
NLM_F_MULTI, NUD_PERMANENT);
if (err < 0)
return err;
skip:
*idx += 1;
}
return 0;
}
/**
* ndo_dflt_fdb_dump - default netdevice operation to dump an FDB table.
* @nlh: netlink message header
* @dev: netdevice
*
* Default netdevice operation to dump the existing unicast address list.
* Returns number of addresses from list put in skb.
*/
int ndo_dflt_fdb_dump(struct sk_buff *skb,
struct netlink_callback *cb,
struct net_device *dev,
struct net_device *filter_dev,
int *idx)
{
int err;
if (dev->type != ARPHRD_ETHER)
return -EINVAL;
netif_addr_lock_bh(dev);
err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc);
if (err)
goto out;
err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc);
out:
netif_addr_unlock_bh(dev);
return err;
}
EXPORT_SYMBOL(ndo_dflt_fdb_dump);
static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net_device *dev;
struct nlattr *tb[IFLA_MAX+1];
struct net_device *br_dev = NULL;
const struct net_device_ops *ops = NULL;
const struct net_device_ops *cops = NULL;
struct ifinfomsg *ifm = nlmsg_data(cb->nlh);
struct net *net = sock_net(skb->sk);
struct hlist_head *head;
int brport_idx = 0;
int br_idx = 0;
int h, s_h;
int idx = 0, s_idx;
int err = 0;
int fidx = 0;
/* A hack to preserve kernel<->userspace interface.
* Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0.
* However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails.
* So, check for ndmsg with an optional u32 attribute (not used here).
* Fortunately these sizes don't conflict with the size of ifinfomsg
* with an optional attribute.
*/
if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) &&
(nlmsg_len(cb->nlh) != sizeof(struct ndmsg) +
nla_attr_size(sizeof(u32)))) {
err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
IFLA_MAX, ifla_policy, NULL);
if (err < 0) {
return -EINVAL;
} else if (err == 0) {
if (tb[IFLA_MASTER])
br_idx = nla_get_u32(tb[IFLA_MASTER]);
}
brport_idx = ifm->ifi_index;
}
if (br_idx) {
br_dev = __dev_get_by_index(net, br_idx);
if (!br_dev)
return -ENODEV;
ops = br_dev->netdev_ops;
}
s_h = cb->args[0];
s_idx = cb->args[1];
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
hlist_for_each_entry(dev, head, index_hlist) {
if (brport_idx && (dev->ifindex != brport_idx))
continue;
if (!br_idx) { /* user did not specify a specific bridge */
if (dev->priv_flags & IFF_BRIDGE_PORT) {
br_dev = netdev_master_upper_dev_get(dev);
cops = br_dev->netdev_ops;
}
} else {
if (dev != br_dev &&
!(dev->priv_flags & IFF_BRIDGE_PORT))
continue;
if (br_dev != netdev_master_upper_dev_get(dev) &&
!(dev->priv_flags & IFF_EBRIDGE))
continue;
cops = ops;
}
if (idx < s_idx)
goto cont;
if (dev->priv_flags & IFF_BRIDGE_PORT) {
if (cops && cops->ndo_fdb_dump) {
err = cops->ndo_fdb_dump(skb, cb,
br_dev, dev,
&fidx);
if (err == -EMSGSIZE)
goto out;
}
}
if (dev->netdev_ops->ndo_fdb_dump)
err = dev->netdev_ops->ndo_fdb_dump(skb, cb,
dev, NULL,
&fidx);
else
err = ndo_dflt_fdb_dump(skb, cb, dev, NULL,
&fidx);
if (err == -EMSGSIZE)
goto out;
cops = NULL;
/* reset fdb offset to 0 for rest of the interfaces */
cb->args[2] = 0;
fidx = 0;
cont:
idx++;
}
}
out:
cb->args[0] = h;
cb->args[1] = idx;
cb->args[2] = fidx;
return skb->len;
}
static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask,
unsigned int attrnum, unsigned int flag)
{
if (mask & flag)
return nla_put_u8(skb, attrnum, !!(flags & flag));
return 0;
}
int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev, u16 mode,
u32 flags, u32 mask, int nlflags,
u32 filter_mask,
int (*vlan_fill)(struct sk_buff *skb,
struct net_device *dev,
u32 filter_mask))
{
struct nlmsghdr *nlh;
struct ifinfomsg *ifm;
struct nlattr *br_afspec;
struct nlattr *protinfo;
u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
int err = 0;
nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags);
if (nlh == NULL)
return -EMSGSIZE;
ifm = nlmsg_data(nlh);
ifm->ifi_family = AF_BRIDGE;
ifm->__ifi_pad = 0;
ifm->ifi_type = dev->type;
ifm->ifi_index = dev->ifindex;
ifm->ifi_flags = dev_get_flags(dev);
ifm->ifi_change = 0;
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
nla_put_u8(skb, IFLA_OPERSTATE, operstate) ||
(br_dev &&
nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) ||
(dev->addr_len &&
nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
(dev->ifindex != dev_get_iflink(dev) &&
nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))))
goto nla_put_failure;
br_afspec = nla_nest_start(skb, IFLA_AF_SPEC);
if (!br_afspec)
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF)) {
nla_nest_cancel(skb, br_afspec);
goto nla_put_failure;
}
if (mode != BRIDGE_MODE_UNDEF) {
if (nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) {
nla_nest_cancel(skb, br_afspec);
goto nla_put_failure;
}
}
if (vlan_fill) {
err = vlan_fill(skb, dev, filter_mask);
if (err) {
nla_nest_cancel(skb, br_afspec);
goto nla_put_failure;
}
}
nla_nest_end(skb, br_afspec);
protinfo = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED);
if (!protinfo)
goto nla_put_failure;
if (brport_nla_put_flag(skb, flags, mask,
IFLA_BRPORT_MODE, BR_HAIRPIN_MODE) ||
brport_nla_put_flag(skb, flags, mask,
IFLA_BRPORT_GUARD, BR_BPDU_GUARD) ||
brport_nla_put_flag(skb, flags, mask,
IFLA_BRPORT_FAST_LEAVE,
BR_MULTICAST_FAST_LEAVE) ||
brport_nla_put_flag(skb, flags, mask,
IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK) ||
brport_nla_put_flag(skb, flags, mask,
IFLA_BRPORT_LEARNING, BR_LEARNING) ||
brport_nla_put_flag(skb, flags, mask,
IFLA_BRPORT_LEARNING_SYNC, BR_LEARNING_SYNC) ||
brport_nla_put_flag(skb, flags, mask,
IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD) ||
brport_nla_put_flag(skb, flags, mask,
IFLA_BRPORT_PROXYARP, BR_PROXYARP)) {
nla_nest_cancel(skb, protinfo);
goto nla_put_failure;
}
nla_nest_end(skb, protinfo);
nlmsg_end(skb, nlh);
return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
return err ? err : -EMSGSIZE;
}
EXPORT_SYMBOL_GPL(ndo_dflt_bridge_getlink);
static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct net_device *dev;
int idx = 0;
u32 portid = NETLINK_CB(cb->skb).portid;
u32 seq = cb->nlh->nlmsg_seq;
u32 filter_mask = 0;
int err;
if (nlmsg_len(cb->nlh) > sizeof(struct ifinfomsg)) {
struct nlattr *extfilt;
extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg),
IFLA_EXT_MASK);
if (extfilt) {
if (nla_len(extfilt) < sizeof(filter_mask))
return -EINVAL;
filter_mask = nla_get_u32(extfilt);
}
}
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
const struct net_device_ops *ops = dev->netdev_ops;
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
if (idx >= cb->args[0]) {
err = br_dev->netdev_ops->ndo_bridge_getlink(
skb, portid, seq, dev,
filter_mask, NLM_F_MULTI);
if (err < 0 && err != -EOPNOTSUPP) {
if (likely(skb->len))
break;
goto out_err;
}
}
idx++;
}
if (ops->ndo_bridge_getlink) {
if (idx >= cb->args[0]) {
err = ops->ndo_bridge_getlink(skb, portid,
seq, dev,
filter_mask,
NLM_F_MULTI);
if (err < 0 && err != -EOPNOTSUPP) {
if (likely(skb->len))
break;
goto out_err;
}
}
idx++;
}
}
err = skb->len;
out_err:
rcu_read_unlock();
cb->args[0] = idx;
return err;
}
static inline size_t bridge_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ nla_total_size(sizeof(u32)) /* IFLA_MASTER */
+ nla_total_size(sizeof(u32)) /* IFLA_MTU */
+ nla_total_size(sizeof(u32)) /* IFLA_LINK */
+ nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */
+ nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */
+ nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */
+ nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */
+ nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */
}
static int rtnl_bridge_notify(struct net_device *dev)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -EOPNOTSUPP;
if (!dev->netdev_ops->ndo_bridge_getlink)
return 0;
skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC);
if (!skb) {
err = -ENOMEM;
goto errout;
}
err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0, 0);
if (err < 0)
goto errout;
/* Notification info is only filled for bridge ports, not the bridge
* device itself. Therefore, a zero notification length is valid and
* should not result in an error.
*/
if (!skb->len)
goto errout;
rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
return 0;
errout:
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
if (err)
rtnl_set_sk_err(net, RTNLGRP_LINK, err);
return err;
}
static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
struct net_device *dev;
struct nlattr *br_spec, *attr = NULL;
int rem, err = -EOPNOTSUPP;
u16 flags = 0;
bool have_flags = false;
if (nlmsg_len(nlh) < sizeof(*ifm))
return -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_family != AF_BRIDGE)
return -EPFNOSUPPORT;
dev = __dev_get_by_index(net, ifm->ifi_index);
if (!dev) {
pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
return -ENODEV;
}
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
nla_for_each_nested(attr, br_spec, rem) {
if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
if (nla_len(attr) < sizeof(flags))
return -EINVAL;
have_flags = true;
flags = nla_get_u16(attr);
break;
}
}
}
if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
if (!br_dev || !br_dev->netdev_ops->ndo_bridge_setlink) {
err = -EOPNOTSUPP;
goto out;
}
err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags);
if (err)
goto out;
flags &= ~BRIDGE_FLAGS_MASTER;
}
if ((flags & BRIDGE_FLAGS_SELF)) {
if (!dev->netdev_ops->ndo_bridge_setlink)
err = -EOPNOTSUPP;
else
err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh,
flags);
if (!err) {
flags &= ~BRIDGE_FLAGS_SELF;
/* Generate event to notify upper layer of bridge
* change
*/
err = rtnl_bridge_notify(dev);
}
}
if (have_flags)
memcpy(nla_data(attr), &flags, sizeof(flags));
out:
return err;
}
static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
struct net_device *dev;
struct nlattr *br_spec, *attr = NULL;
int rem, err = -EOPNOTSUPP;
u16 flags = 0;
bool have_flags = false;
if (nlmsg_len(nlh) < sizeof(*ifm))
return -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_family != AF_BRIDGE)
return -EPFNOSUPPORT;
dev = __dev_get_by_index(net, ifm->ifi_index);
if (!dev) {
pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
return -ENODEV;
}
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
nla_for_each_nested(attr, br_spec, rem) {
if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
if (nla_len(attr) < sizeof(flags))
return -EINVAL;
have_flags = true;
flags = nla_get_u16(attr);
break;
}
}
}
if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) {
err = -EOPNOTSUPP;
goto out;
}
err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh, flags);
if (err)
goto out;
flags &= ~BRIDGE_FLAGS_MASTER;
}
if ((flags & BRIDGE_FLAGS_SELF)) {
if (!dev->netdev_ops->ndo_bridge_dellink)
err = -EOPNOTSUPP;
else
err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh,
flags);
if (!err) {
flags &= ~BRIDGE_FLAGS_SELF;
/* Generate event to notify upper layer of bridge
* change
*/
err = rtnl_bridge_notify(dev);
}
}
if (have_flags)
memcpy(nla_data(attr), &flags, sizeof(flags));
out:
return err;
}
static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr)
{
return (mask & IFLA_STATS_FILTER_BIT(attrid)) &&
(!idxattr || idxattr == attrid);
}
#define IFLA_OFFLOAD_XSTATS_FIRST (IFLA_OFFLOAD_XSTATS_UNSPEC + 1)
static int rtnl_get_offload_stats_attr_size(int attr_id)
{
switch (attr_id) {
case IFLA_OFFLOAD_XSTATS_CPU_HIT:
return sizeof(struct rtnl_link_stats64);
}
return 0;
}
static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev,
int *prividx)
{
struct nlattr *attr = NULL;
int attr_id, size;
void *attr_data;
int err;
if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
dev->netdev_ops->ndo_get_offload_stats))
return -ENODATA;
for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
if (attr_id < *prividx)
continue;
size = rtnl_get_offload_stats_attr_size(attr_id);
if (!size)
continue;
if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
continue;
attr = nla_reserve_64bit(skb, attr_id, size,
IFLA_OFFLOAD_XSTATS_UNSPEC);
if (!attr)
goto nla_put_failure;
attr_data = nla_data(attr);
memset(attr_data, 0, size);
err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev,
attr_data);
if (err)
goto get_offload_stats_failure;
}
if (!attr)
return -ENODATA;
*prividx = 0;
return 0;
nla_put_failure:
err = -EMSGSIZE;
get_offload_stats_failure:
*prividx = attr_id;
return err;
}
static int rtnl_get_offload_stats_size(const struct net_device *dev)
{
int nla_size = 0;
int attr_id;
int size;
if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
dev->netdev_ops->ndo_get_offload_stats))
return 0;
for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
continue;
size = rtnl_get_offload_stats_attr_size(attr_id);
nla_size += nla_total_size_64bit(size);
}
if (nla_size != 0)
nla_size += nla_total_size(0);
return nla_size;
}
static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, unsigned int filter_mask,
int *idxattr, int *prividx)
{
struct if_stats_msg *ifsm;
struct nlmsghdr *nlh;
struct nlattr *attr;
int s_prividx = *prividx;
int err;
ASSERT_RTNL();
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifsm), flags);
if (!nlh)
return -EMSGSIZE;
ifsm = nlmsg_data(nlh);
ifsm->family = PF_UNSPEC;
ifsm->pad1 = 0;
ifsm->pad2 = 0;
ifsm->ifindex = dev->ifindex;
ifsm->filter_mask = filter_mask;
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, *idxattr)) {
struct rtnl_link_stats64 *sp;
attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64,
sizeof(struct rtnl_link_stats64),
IFLA_STATS_UNSPEC);
if (!attr)
goto nla_put_failure;
sp = nla_data(attr);
dev_get_stats(dev, sp);
}
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, *idxattr)) {
const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
if (ops && ops->fill_linkxstats) {
*idxattr = IFLA_STATS_LINK_XSTATS;
attr = nla_nest_start(skb,
IFLA_STATS_LINK_XSTATS);
if (!attr)
goto nla_put_failure;
err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
nla_nest_end(skb, attr);
if (err)
goto nla_put_failure;
*idxattr = 0;
}
}
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE,
*idxattr)) {
const struct rtnl_link_ops *ops = NULL;
const struct net_device *master;
master = netdev_master_upper_dev_get(dev);
if (master)
ops = master->rtnl_link_ops;
if (ops && ops->fill_linkxstats) {
*idxattr = IFLA_STATS_LINK_XSTATS_SLAVE;
attr = nla_nest_start(skb,
IFLA_STATS_LINK_XSTATS_SLAVE);
if (!attr)
goto nla_put_failure;
err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
nla_nest_end(skb, attr);
if (err)
goto nla_put_failure;
*idxattr = 0;
}
}
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS,
*idxattr)) {
*idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS;
attr = nla_nest_start(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS);
if (!attr)
goto nla_put_failure;
err = rtnl_get_offload_stats(skb, dev, prividx);
if (err == -ENODATA)
nla_nest_cancel(skb, attr);
else
nla_nest_end(skb, attr);
if (err && err != -ENODATA)
goto nla_put_failure;
*idxattr = 0;
}
if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, *idxattr)) {
struct rtnl_af_ops *af_ops;
*idxattr = IFLA_STATS_AF_SPEC;
attr = nla_nest_start(skb, IFLA_STATS_AF_SPEC);
if (!attr)
goto nla_put_failure;
list_for_each_entry(af_ops, &rtnl_af_ops, list) {
if (af_ops->fill_stats_af) {
struct nlattr *af;
int err;
af = nla_nest_start(skb, af_ops->family);
if (!af)
goto nla_put_failure;
err = af_ops->fill_stats_af(skb, dev);
if (err == -ENODATA)
nla_nest_cancel(skb, af);
else if (err < 0)
goto nla_put_failure;
nla_nest_end(skb, af);
}
}
nla_nest_end(skb, attr);
*idxattr = 0;
}
nlmsg_end(skb, nlh);
return 0;
nla_put_failure:
/* not a multi message or no progress mean a real error */
if (!(flags & NLM_F_MULTI) || s_prividx == *prividx)
nlmsg_cancel(skb, nlh);
else
nlmsg_end(skb, nlh);
return -EMSGSIZE;
}
static size_t if_nlmsg_stats_size(const struct net_device *dev,
u32 filter_mask)
{
size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg));
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) {
const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
int attr = IFLA_STATS_LINK_XSTATS;
if (ops && ops->get_linkxstats_size) {
size += nla_total_size(ops->get_linkxstats_size(dev,
attr));
/* for IFLA_STATS_LINK_XSTATS */
size += nla_total_size(0);
}
}
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE, 0)) {
struct net_device *_dev = (struct net_device *)dev;
const struct rtnl_link_ops *ops = NULL;
const struct net_device *master;
/* netdev_master_upper_dev_get can't take const */
master = netdev_master_upper_dev_get(_dev);
if (master)
ops = master->rtnl_link_ops;
if (ops && ops->get_linkxstats_size) {
int attr = IFLA_STATS_LINK_XSTATS_SLAVE;
size += nla_total_size(ops->get_linkxstats_size(dev,
attr));
/* for IFLA_STATS_LINK_XSTATS_SLAVE */
size += nla_total_size(0);
}
}
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0))
size += rtnl_get_offload_stats_size(dev);
if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, 0)) {
struct rtnl_af_ops *af_ops;
/* for IFLA_STATS_AF_SPEC */
size += nla_total_size(0);
list_for_each_entry(af_ops, &rtnl_af_ops, list) {
if (af_ops->get_stats_af_size) {
size += nla_total_size(
af_ops->get_stats_af_size(dev));
/* for AF_* */
size += nla_total_size(0);
}
}
}
return size;
}
static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_device *dev = NULL;
int idxattr = 0, prividx = 0;
struct if_stats_msg *ifsm;
struct sk_buff *nskb;
u32 filter_mask;
int err;
if (nlmsg_len(nlh) < sizeof(*ifsm))
return -EINVAL;
ifsm = nlmsg_data(nlh);
if (ifsm->ifindex > 0)
dev = __dev_get_by_index(net, ifsm->ifindex);
else
return -EINVAL;
if (!dev)
return -ENODEV;
filter_mask = ifsm->filter_mask;
if (!filter_mask)
return -EINVAL;
nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL);
if (!nskb)
return -ENOBUFS;
err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
0, filter_mask, &idxattr, &prividx);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
WARN_ON(err == -EMSGSIZE);
kfree_skb(nskb);
} else {
err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
}
return err;
}
static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
int h, s_h, err, s_idx, s_idxattr, s_prividx;
struct net *net = sock_net(skb->sk);
unsigned int flags = NLM_F_MULTI;
struct if_stats_msg *ifsm;
struct hlist_head *head;
struct net_device *dev;
u32 filter_mask = 0;
int idx = 0;
s_h = cb->args[0];
s_idx = cb->args[1];
s_idxattr = cb->args[2];
s_prividx = cb->args[3];
cb->seq = net->dev_base_seq;
if (nlmsg_len(cb->nlh) < sizeof(*ifsm))
return -EINVAL;
ifsm = nlmsg_data(cb->nlh);
filter_mask = ifsm->filter_mask;
if (!filter_mask)
return -EINVAL;
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
hlist_for_each_entry(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
flags, filter_mask,
&s_idxattr, &s_prividx);
/* If we ran out of room on the first message,
* we're in trouble
*/
WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
if (err < 0)
goto out;
s_prividx = 0;
s_idxattr = 0;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
}
out:
cb->args[3] = s_prividx;
cb->args[2] = s_idxattr;
cb->args[1] = idx;
cb->args[0] = h;
return skb->len;
}
/* Process one rtnetlink message. */
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct rtnl_link *handlers;
int err = -EOPNOTSUPP;
rtnl_doit_func doit;
unsigned int flags;
int kind;
int family;
int type;
type = nlh->nlmsg_type;
if (type > RTM_MAX)
return -EOPNOTSUPP;
type -= RTM_BASE;
/* All the messages must have at least 1 byte length */
if (nlmsg_len(nlh) < sizeof(struct rtgenmsg))
return 0;
family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
kind = type&3;
if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (family >= ARRAY_SIZE(rtnl_msg_handlers))
family = PF_UNSPEC;
rcu_read_lock();
handlers = rcu_dereference(rtnl_msg_handlers[family]);
if (!handlers) {
family = PF_UNSPEC;
handlers = rcu_dereference(rtnl_msg_handlers[family]);
}
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
u16 min_dump_alloc = 0;
dumpit = READ_ONCE(handlers[type].dumpit);
if (!dumpit) {
family = PF_UNSPEC;
handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]);
if (!handlers)
goto err_unlock;
dumpit = READ_ONCE(handlers[type].dumpit);
if (!dumpit)
goto err_unlock;
}
refcount_inc(&rtnl_msg_handlers_ref[family]);
if (type == RTM_GETLINK - RTM_BASE)
min_dump_alloc = rtnl_calcit(skb, nlh);
rcu_read_unlock();
rtnl = net->rtnl;
{
struct netlink_dump_control c = {
.dump = dumpit,
.min_dump_alloc = min_dump_alloc,
};
err = netlink_dump_start(rtnl, skb, nlh, &c);
}
refcount_dec(&rtnl_msg_handlers_ref[family]);
return err;
}
doit = READ_ONCE(handlers[type].doit);
if (!doit) {
family = PF_UNSPEC;
handlers = rcu_dereference(rtnl_msg_handlers[family]);
}
flags = READ_ONCE(handlers[type].flags);
if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
refcount_inc(&rtnl_msg_handlers_ref[family]);
doit = READ_ONCE(handlers[type].doit);
rcu_read_unlock();
if (doit)
err = doit(skb, nlh, extack);
refcount_dec(&rtnl_msg_handlers_ref[family]);
return err;
}
rcu_read_unlock();
rtnl_lock();
handlers = rtnl_dereference(rtnl_msg_handlers[family]);
if (handlers) {
doit = READ_ONCE(handlers[type].doit);
if (doit)
err = doit(skb, nlh, extack);
}
rtnl_unlock();
return err;
err_unlock:
rcu_read_unlock();
return -EOPNOTSUPP;
}
static void rtnetlink_rcv(struct sk_buff *skb)
{
netlink_rcv_skb(skb, &rtnetlink_rcv_msg);
}
static int rtnetlink_bind(struct net *net, int group)
{
switch (group) {
case RTNLGRP_IPV4_MROUTE_R:
case RTNLGRP_IPV6_MROUTE_R:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
break;
}
return 0;
}
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
switch (event) {
case NETDEV_REBOOT:
case NETDEV_CHANGEMTU:
case NETDEV_CHANGEADDR:
case NETDEV_CHANGENAME:
case NETDEV_FEAT_CHANGE:
case NETDEV_BONDING_FAILOVER:
case NETDEV_POST_TYPE_CHANGE:
case NETDEV_NOTIFY_PEERS:
case NETDEV_CHANGEUPPER:
case NETDEV_RESEND_IGMP:
case NETDEV_CHANGEINFODATA:
case NETDEV_CHANGE_TX_QUEUE_LEN:
rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
GFP_KERNEL);
break;
default:
break;
}
return NOTIFY_DONE;
}
static struct notifier_block rtnetlink_dev_notifier = {
.notifier_call = rtnetlink_event,
};
static int __net_init rtnetlink_net_init(struct net *net)
{
struct sock *sk;
struct netlink_kernel_cfg cfg = {
.groups = RTNLGRP_MAX,
.input = rtnetlink_rcv,
.cb_mutex = &rtnl_mutex,
.flags = NL_CFG_F_NONROOT_RECV,
.bind = rtnetlink_bind,
};
sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg);
if (!sk)
return -ENOMEM;
net->rtnl = sk;
return 0;
}
static void __net_exit rtnetlink_net_exit(struct net *net)
{
netlink_kernel_release(net->rtnl);
net->rtnl = NULL;
}
static struct pernet_operations rtnetlink_net_ops = {
.init = rtnetlink_net_init,
.exit = rtnetlink_net_exit,
};
void __init rtnetlink_init(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(rtnl_msg_handlers_ref); i++)
refcount_set(&rtnl_msg_handlers_ref[i], 1);
if (register_pernet_subsys(&rtnetlink_net_ops))
panic("rtnetlink_init: cannot initialize rtnetlink\n");
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
rtnl_dump_ifinfo, 0);
rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0);
rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0);
rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0);
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, 0);
rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
0);
}