Hi, ok, at least some progress has happened:
-Replaced device-specific oper_state method with NETIF_F_STACKED flag to select between IF_OPER_DOWN or IF_OPER_LOWERLAYERDOWN -sysfs support to read operstate -completed netlink support (Jamal, Thomas, can you verify the code?) -added netif_oper_up() query function -treat IF_OPER_UNKNOWN equivalent to IF_OPER_UP in some cases to have compat to devices that do not set carrier state -adopted vlan drivers -verified operation with starfire, vlan, loopback. IF_OPER_UNKNOWN won't propagate to upper layers when stacking devices. Todo: -complete sysfs -test netlink userspace interaction -docs Stefan
diff -X dontdiff -ur linux-2.6.14/include/linux/if.h linux-2.6.14-rfc2863/include/linux/if.h --- linux-2.6.14/include/linux/if.h 2005-11-02 11:07:32.000000000 +0100 +++ linux-2.6.14-rfc2863/include/linux/if.h 2005-11-18 20:14:25.000000000 +0100 @@ -33,7 +33,7 @@ #define IFF_LOOPBACK 0x8 /* is a loopback net */ #define IFF_POINTOPOINT 0x10 /* interface is has p-p link */ #define IFF_NOTRAILERS 0x20 /* avoid use of trailers */ -#define IFF_RUNNING 0x40 /* interface running and carrier ok */ +#define IFF_RUNNING 0x40 /* interface RFC2863 OPER_UP */ #define IFF_NOARP 0x80 /* no ARP protocol */ #define IFF_PROMISC 0x100 /* receive all packets */ #define IFF_ALLMULTI 0x200 /* receive all multicast packets*/ @@ -43,12 +43,16 @@ #define IFF_MULTICAST 0x1000 /* Supports multicast */ -#define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_MASTER|IFF_SLAVE|IFF_RUNNING) - #define IFF_PORTSEL 0x2000 /* can set media type */ #define IFF_AUTOMEDIA 0x4000 /* auto media select active */ #define IFF_DYNAMIC 0x8000 /* dialup device with changing addresses*/ +#define IFF_CARRIER 0x10000 /* driver signals carrier */ +#define IFF_DORMANT 0x20000 /* driver signals dormant */ + +#define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|\ + IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_CARRIER|IFF_DORMANT) + /* Private (from user) interface flags (netdevice->priv_flags). */ #define IFF_802_1Q_VLAN 0x1 /* 802.1Q VLAN device. */ #define IFF_EBRIDGE 0x2 /* Ethernet bridging device. */ @@ -80,6 +84,22 @@ #define IF_PROTO_FR_ETH_PVC 0x200B #define IF_PROTO_RAW 0x200C /* RAW Socket */ +/* RFC 2863 operational status */ +enum { + IF_OPER_UNKNOWN, + IF_OPER_NOTPRESENT, + IF_OPER_DOWN, + IF_OPER_LOWERLAYERDOWN, + IF_OPER_TESTING, + IF_OPER_DORMANT, + IF_OPER_UP, +}; + +/* link modes */ +enum { + IF_LINK_MODE_DEFAULT, + IF_LINK_MODE_DORMANT, /* limit upward transition to dormant */ +}; /* * Device mapping structure. I'd just gone off and designed a diff -X dontdiff -ur linux-2.6.14/include/linux/if_vlan.h linux-2.6.14-rfc2863/include/linux/if_vlan.h --- linux-2.6.14/include/linux/if_vlan.h 2005-11-02 11:08:10.000000000 +0100 +++ linux-2.6.14-rfc2863/include/linux/if_vlan.h 2005-11-28 13:50:07.000000000 +0100 @@ -64,6 +64,8 @@ /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(void __user *)); +extern void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev); + #define VLAN_NAME "vlan" /* if this changes, algorithm will have to be reworked because this diff -X dontdiff -ur linux-2.6.14/include/linux/netdevice.h linux-2.6.14-rfc2863/include/linux/netdevice.h --- linux-2.6.14/include/linux/netdevice.h 2005-11-02 11:08:10.000000000 +0100 +++ linux-2.6.14-rfc2863/include/linux/netdevice.h 2005-11-28 13:33:35.000000000 +0100 @@ -230,7 +230,8 @@ __LINK_STATE_SCHED, __LINK_STATE_NOCARRIER, __LINK_STATE_RX_SCHED, - __LINK_STATE_LINKWATCH_PENDING + __LINK_STATE_LINKWATCH_PENDING, + __LINK_STATE_DORMANT, }; @@ -308,6 +309,7 @@ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ #define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_STACKED 8192 /* Interface is stacked */ struct net_device *next_sched; @@ -334,11 +336,14 @@ */ - unsigned short flags; /* interface flags (a la BSD) */ + unsigned int flags; /* interface flags (a la BSD) */ unsigned short gflags; unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ unsigned short padded; /* How much padding added by alloc_netdev() */ + unsigned char operstate; /* RFC2863 operstate */ + unsigned char link_mode; /* mapping policy to operstate */ + unsigned mtu; /* interface MTU value */ unsigned short type; /* interface hardware type */ unsigned short hard_header_len; /* hardware hdr length */ @@ -712,6 +717,10 @@ /* Carrier loss detection, dial on demand. The functions netif_carrier_on * and _off may be called from IRQ context, but it is caller * who is responsible for serialization of these calls. + * + * The name carrier is inappropriate, these functions should really be + * called netif_lowerlayer_*() because they represent the state of any + * kind of lower layer not just hardware media. */ extern void linkwatch_fire_event(struct net_device *dev); @@ -727,6 +736,29 @@ extern void netif_carrier_off(struct net_device *dev); +static inline void netif_dormant_on(struct net_device *dev) +{ + if (!test_and_set_bit(__LINK_STATE_DORMANT, &dev->state)) + linkwatch_fire_event(dev); +} + +static inline void netif_dormant_off(struct net_device *dev) +{ + if (test_and_clear_bit(__LINK_STATE_DORMANT, &dev->state)) + linkwatch_fire_event(dev); +} + +static inline int netif_dormant(const struct net_device *dev) +{ + return test_bit(__LINK_STATE_DORMANT, &dev->state); +} + + +static inline int netif_oper_up(const struct net_device *dev) { + return (dev->operstate == IF_OPER_UP || + dev->operstate == IF_OPER_UNKNOWN /* backward compat */); +} + /* Hot-plugging. */ static inline int netif_device_present(struct net_device *dev) { diff -X dontdiff -ur linux-2.6.14/include/linux/rtnetlink.h linux-2.6.14-rfc2863/include/linux/rtnetlink.h --- linux-2.6.14/include/linux/rtnetlink.h 2005-11-02 11:08:11.000000000 +0100 +++ linux-2.6.14-rfc2863/include/linux/rtnetlink.h 2005-11-18 20:14:05.000000000 +0100 @@ -733,6 +733,8 @@ #define IFLA_MAP IFLA_MAP IFLA_WEIGHT, #define IFLA_WEIGHT IFLA_WEIGHT + IFLA_OPERSTATE, + IFLA_LINKMODE, __IFLA_MAX }; diff -X dontdiff -ur linux-2.6.14/net/8021q/vlan.c linux-2.6.14-rfc2863/net/8021q/vlan.c --- linux-2.6.14/net/8021q/vlan.c 2005-11-02 11:07:35.000000000 +0100 +++ linux-2.6.14-rfc2863/net/8021q/vlan.c 2005-11-28 13:53:27.000000000 +0100 @@ -68,7 +68,7 @@ /* Bits of netdev state that are propagated from real device to virtual */ #define VLAN_LINK_STATE_MASK \ - ((1<<__LINK_STATE_PRESENT)|(1<<__LINK_STATE_NOCARRIER)) + ((1<<__LINK_STATE_PRESENT)|(1<<__LINK_STATE_NOCARRIER)|(1<<__LINK_STATE_DORMANT)) /* End of global variables definitions. */ @@ -325,6 +325,8 @@ */ new_dev->get_stats = vlan_dev_get_stats; + new_dev->features |= NETIF_F_STACKED; + /* Make this thing known as a VLAN device */ new_dev->priv_flags |= IFF_802_1Q_VLAN; @@ -343,6 +345,26 @@ new_dev->do_ioctl = vlan_dev_ioctl; } +void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev) +{ + if (netif_carrier_ok(dev)) { + if (!netif_carrier_ok(vlandev)) + netif_carrier_on(vlandev); + } else { + if (netif_carrier_ok(vlandev)) + netif_carrier_off(vlandev); + } + + /* Have to respect userspace enforced dormant state + * of real device, also must allow supplicant running + * on VLAN device + */ + if (dev->operstate == IF_OPER_DORMANT) + netif_dormant_on(vlandev); + else + netif_dormant_off(vlandev); +} + /* Attach a VLAN device to a mac address (ie Ethernet Card). * Returns the device that was created, or NULL if there was * an error of some kind. @@ -578,13 +600,7 @@ if (!vlandev) continue; - if (netif_carrier_ok(dev)) { - if (!netif_carrier_ok(vlandev)) - netif_carrier_on(vlandev); - } else { - if (netif_carrier_ok(vlandev)) - netif_carrier_off(vlandev); - } + vlan_transfer_operstate(dev, vlandev); if ((vlandev->state & VLAN_LINK_STATE_MASK) != flgs) { vlandev->state = (vlandev->state &~ VLAN_LINK_STATE_MASK) diff -X dontdiff -ur linux-2.6.14/net/8021q/vlan_dev.c linux-2.6.14-rfc2863/net/8021q/vlan_dev.c --- linux-2.6.14/net/8021q/vlan_dev.c 2005-11-02 11:08:11.000000000 +0100 +++ linux-2.6.14-rfc2863/net/8021q/vlan_dev.c 2005-11-28 13:56:33.000000000 +0100 @@ -786,6 +786,8 @@ { if (!(VLAN_DEV_INFO(dev)->real_dev->flags & IFF_UP)) return -ENETDOWN; + vlan_transfer_operstate(VLAN_DEV_INFO(dev)->real_dev, dev); + linkwatch_fire_event(dev); /* force event to setup operstate */ return 0; } diff -X dontdiff -ur linux-2.6.14/net/core/dev.c linux-2.6.14-rfc2863/net/core/dev.c --- linux-2.6.14/net/core/dev.c 2005-11-06 17:35:22.000000000 +0100 +++ linux-2.6.14-rfc2863/net/core/dev.c 2005-11-28 13:33:58.000000000 +0100 @@ -2141,12 +2141,20 @@ flags = (dev->flags & ~(IFF_PROMISC | IFF_ALLMULTI | - IFF_RUNNING)) | + IFF_RUNNING | + IFF_CARRIER | + IFF_DORMANT)) | (dev->gflags & (IFF_PROMISC | IFF_ALLMULTI)); - if (netif_running(dev) && netif_carrier_ok(dev)) - flags |= IFF_RUNNING; + if (netif_running(dev)) { + if (netif_oper_up(dev)) + flags |= IFF_RUNNING; + if (netif_carrier_ok(dev)) + flags |= IFF_CARRIER; + if (netif_dormant(dev)) + flags |= IFF_DORMANT; + } return flags; } diff -X dontdiff -ur linux-2.6.14/net/core/link_watch.c linux-2.6.14-rfc2863/net/core/link_watch.c --- linux-2.6.14/net/core/link_watch.c 2005-06-17 21:48:29.000000000 +0200 +++ linux-2.6.14-rfc2863/net/core/link_watch.c 2005-11-27 15:09:05.000000000 +0100 @@ -49,6 +49,34 @@ /* Avoid kmalloc() for most systems */ static struct lw_event singleevent; +static inline unsigned char default_operstate(const struct net_device *dev) { + if (!netif_carrier_ok(dev)) + return dev->features&NETIF_F_STACKED?IF_OPER_LOWERLAYERDOWN:IF_OPER_DOWN; + if (netif_dormant(dev)) return IF_OPER_DORMANT; + return IF_OPER_UP; +} + + +static void rfc2863_policy(struct net_device *dev) { + unsigned char operstate = default_operstate(dev); + + if (operstate == dev->operstate) return; + + switch(dev->link_mode) { + case IF_LINK_MODE_DORMANT: + if (operstate == IF_OPER_UP) operstate = IF_OPER_DORMANT; + break; + case IF_LINK_MODE_DEFAULT: + default: + break; + } + + write_lock_bh(&dev_base_lock); + dev->operstate = operstate; + write_unlock_bh(&dev_base_lock); +} + + /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) { @@ -81,6 +109,7 @@ } else dev_deactivate(dev); + rfc2863_policy(dev); netdev_state_change(dev); } diff -X dontdiff -ur linux-2.6.14/net/core/net-sysfs.c linux-2.6.14-rfc2863/net/core/net-sysfs.c --- linux-2.6.14/net/core/net-sysfs.c 2005-06-17 21:48:29.000000000 +0200 +++ linux-2.6.14-rfc2863/net/core/net-sysfs.c 2005-11-28 12:57:45.000000000 +0100 @@ -136,9 +136,34 @@ return -EINVAL; } +static const char *operstates[] = { + "unknown", + NULL, /* notpresent, currently unused */ + "down", + "lowerlayerdown", + NULL, /* testing, currently unused */ + "dormant", + "up" +}; + +static ssize_t show_operstate(struct class_device *dev, char *buf) +{ + const struct net_device *netdev = to_net_dev(dev); + unsigned char operstate; + + read_lock(&dev_base_lock); + operstate = netdev->operstate; + if (!netif_running(netdev)) operstate = IF_OPER_DOWN; + read_unlock(&dev_base_lock); + + if (operstate >= sizeof(operstates)) return -EINVAL; /* should not happen */ + return sprintf(buf, "%s\n", operstates[operstate]); +} + static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL); static CLASS_DEVICE_ATTR(broadcast, S_IRUGO, show_broadcast, NULL); static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL); +static CLASS_DEVICE_ATTR(operstate, S_IRUGO, show_operstate, NULL); /* read-write attributes */ NETDEVICE_SHOW(mtu, fmt_dec); @@ -215,6 +240,7 @@ &class_device_attr_address, &class_device_attr_broadcast, &class_device_attr_carrier, + &class_device_attr_operstate, NULL }; diff -X dontdiff -ur linux-2.6.14/net/core/rtnetlink.c linux-2.6.14-rfc2863/net/core/rtnetlink.c --- linux-2.6.14/net/core/rtnetlink.c 2005-11-02 11:08:12.000000000 +0100 +++ linux-2.6.14-rfc2863/net/core/rtnetlink.c 2005-11-28 13:33:31.000000000 +0100 @@ -178,6 +178,31 @@ } +static void set_operstate(struct net_device *dev, unsigned char transition) { + unsigned char operstate = dev->operstate; + ASSERT_RTNL(); + + switch(transition) { + case IF_OPER_UP: + if (operstate == IF_OPER_DORMANT || + operstate == IF_OPER_UNKNOWN) + operstate = IF_OPER_UP; + break; + case IF_OPER_DORMANT: + if (operstate == IF_OPER_UP || + operstate == IF_OPER_UNKNOWN) + operstate = IF_OPER_DORMANT; + break; + } + + if (dev->operstate != operstate) { + write_lock_bh(&dev_base_lock); + dev->operstate = operstate; + write_unlock_bh(&dev_base_lock); + netdev_state_change(dev); + } +} + static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags) @@ -208,6 +233,13 @@ } if (1) { + u8 operstate = dev->operstate; + u8 link_mode = dev->link_mode; + RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate); + RTA_PUT(skb, IFLA_LINKMODE, sizeof(link_mode), &link_mode); + } + + if (1) { struct rtnl_link_ifmap map = { .mem_start = dev->mem_start, .mem_end = dev->mem_end, @@ -398,6 +430,22 @@ dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1])); } + if (ida[IFLA_OPERSTATE - 1]) { + if (ida[IFLA_OPERSTATE - 1]->rta_len != RTA_LENGTH(sizeof(u8))) + goto out; + + set_operstate(dev, *((u8 *) RTA_DATA(ida[IFLA_OPERSTATE - 1]))); + } + + if (ida[IFLA_LINKMODE - 1]) { + if (ida[IFLA_LINKMODE - 1]->rta_len != RTA_LENGTH(sizeof(u8))) + goto out; + + write_lock_bh(&dev_base_lock); + dev->link_mode = *((u8 *) RTA_DATA(ida[IFLA_LINKMODE - 1])); + write_unlock_bh(&dev_base_lock); + } + if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) { char ifname[IFNAMSIZ];