Linux Networking and Network Devices APIs¶
Linux Networking¶
Networking Base Types¶
- enumsock_type¶
Socket types
Constants
SOCK_STREAMstream (connection) socket
SOCK_DGRAMdatagram (conn.less) socket
SOCK_RAWraw socket
SOCK_RDMreliably-delivered message
SOCK_SEQPACKETsequential packet socket
SOCK_DCCPDatagram Congestion Control Protocol socket
SOCK_PACKETlinux specific way of getting packets at the dev level.For writing rarp and other similar things on the user level.
Description
When adding some new socket type pleasegrep ARCH_HAS_SOCKET_TYPE include/asm-* /socket.h, at least MIPSoverrides this enum for binary compat reasons.
- enumsock_shutdown_cmd¶
Shutdown types
Constants
SHUT_RDshutdown receptions
SHUT_WRshutdown transmissions
SHUT_RDWRshutdown receptions/transmissions
- structsocket¶
general BSD socket
Definition:
struct socket { socket_state state; short type; unsigned long flags; struct file *file; struct sock *sk; const struct proto_ops *ops; struct socket_wq wq;};Members
statesocket state (
SS_CONNECTED, etc)typesocket type (
SOCK_STREAM, etc)flagssocket flags (
SOCK_NOSPACE, etc)fileFile back pointer for gc
skinternal networking protocol agnostic socket representation
opsprotocol specific socket operations
wqwait queue for several uses
Socket Buffer Functions¶
- unsignedintskb_frag_size(constskb_frag_t*frag)¶
Returns the size of a skb fragment
Parameters
constskb_frag_t*fragskb fragment
- voidskb_frag_size_set(skb_frag_t*frag,unsignedintsize)¶
Sets the size of a skb fragment
Parameters
skb_frag_t*fragskb fragment
unsignedintsizesize of fragment
- voidskb_frag_size_add(skb_frag_t*frag,intdelta)¶
Increments the size of a skb fragment bydelta
Parameters
skb_frag_t*fragskb fragment
intdeltavalue to add
- voidskb_frag_size_sub(skb_frag_t*frag,intdelta)¶
Decrements the size of a skb fragment bydelta
Parameters
skb_frag_t*fragskb fragment
intdeltavalue to subtract
- boolskb_frag_must_loop(structpage*p)¶
Test if
pis a high memory page
Parameters
structpage*pfragment’s page
- skb_frag_foreach_page¶
skb_frag_foreach_page(f,f_off,f_len,p,p_off,p_len,copied)
loop over pages in a fragment
Parameters
fskb frag to operate on
f_offoffset from start of f->netmem
f_lenlength from f_off to loop over
p(temp var) current page
p_off(temp var) offset from start of current page,non-zero only on first page.
p_len(temp var) length in current page,< PAGE_SIZE only on first and last page.
copied(temp var) length so far, excluding current p_len.
Description
A fragment can hold a compound page, in which case per-pageoperations, notably kmap_atomic, must be called for eachregular page.
- structskb_shared_hwtstamps¶
hardware time stamps
Definition:
struct skb_shared_hwtstamps { union { ktime_t hwtstamp; void *netdev_data; };};Members
{unnamed_union}anonymous
hwtstamphardware time stamp transformed into durationsince arbitrary point in time
netdev_dataaddress/cookie of network device driver used asreference to actual hardware time stamp
Description
Software time stamps generated byktime_get_real() are stored inskb->tstamp.
hwtstamps can only be compared against other hwtstamps fromthe same device.
This structure is attached to packets as part of theskb_shared_info. Useskb_hwtstamps() to get a pointer.
- structsk_buff¶
socket buffer
Definition:
struct sk_buff { union { struct { struct sk_buff *next; struct sk_buff *prev; union { struct net_device *dev; unsigned long dev_scratch; }; }; struct rb_node rbnode; struct list_head list; struct llist_node ll_node; }; struct sock *sk; union { ktime_t tstamp; u64 skb_mstamp_ns; }; char cb[48] ; union { struct { unsigned long _skb_refdst; void (*destructor)(struct sk_buff *skb); }; struct list_head tcp_tsorted_anchor;#ifdef CONFIG_NET_SOCK_MSG; unsigned long _sk_redir;#endif; };#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE); unsigned long _nfct;#endif; unsigned int len, data_len; __u16 mac_len, hdr_len; __u16 queue_mapping;#ifdef __BIG_ENDIAN_BITFIELD;#define CLONED_MASK (1 << 7);#else;#define CLONED_MASK 1;#endif;#define CLONED_OFFSET offsetof(struct sk_buff, __cloned_offset); __u8 cloned:1, nohdr:1, fclone:2, peeked:1, head_frag:1, pfmemalloc:1, pp_recycle:1;#ifdef CONFIG_SKB_EXTENSIONS; __u8 active_extensions;#endif; __u8 pkt_type:3; __u8 ignore_df:1; __u8 dst_pending_confirm:1; __u8 ip_summed:2; __u8 ooo_okay:1; __u8 tstamp_type:2;#ifdef CONFIG_NET_XGRESS; __u8 tc_at_ingress:1; __u8 tc_skip_classify:1;#endif; __u8 remcsum_offload:1; __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 inner_protocol_type:1; __u8 l4_hash:1; __u8 sw_hash:1;#ifdef CONFIG_WIRELESS; __u8 wifi_acked_valid:1; __u8 wifi_acked:1;#endif; __u8 no_fcs:1; __u8 encapsulation:1; __u8 encap_hdr_csum:1; __u8 csum_valid:1;#ifdef CONFIG_IPV6_NDISC_NODETYPE; __u8 ndisc_nodetype:2;#endif;#if IS_ENABLED(CONFIG_IP_VS); __u8 ipvs_property:1;#endif;#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES); __u8 nf_trace:1;#endif;#ifdef CONFIG_NET_SWITCHDEV; __u8 offload_fwd_mark:1; __u8 offload_l3_fwd_mark:1;#endif; __u8 redirected:1;#ifdef CONFIG_NET_REDIRECT; __u8 from_ingress:1;#endif;#ifdef CONFIG_NETFILTER_SKIP_EGRESS; __u8 nf_skip_egress:1;#endif;#ifdef CONFIG_SKB_DECRYPTED; __u8 decrypted:1;#endif; __u8 slow_gro:1;#if IS_ENABLED(CONFIG_IP_SCTP); __u8 csum_not_inet:1;#endif; __u8 unreadable:1;#if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS); __u16 tc_index;#endif; u16 alloc_cpu; union { __wsum csum; struct { __u16 csum_start; __u16 csum_offset; }; }; __u32 priority; int skb_iif; __u32 hash; union { u32 vlan_all; struct { __be16 vlan_proto; __u16 vlan_tci; }; };#if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS); union { unsigned int napi_id; unsigned int sender_cpu; };#endif;#ifdef CONFIG_NETWORK_SECMARK; __u32 secmark;#endif; union { __u32 mark; __u32 reserved_tailroom; }; union { __be16 inner_protocol; __u8 inner_ipproto; }; __u16 inner_transport_header; __u16 inner_network_header; __u16 inner_mac_header; __be16 protocol; __u16 transport_header; __u16 network_header; __u16 mac_header;#ifdef CONFIG_KCOV; u64 kcov_handle;#endif; sk_buff_data_t tail; sk_buff_data_t end; unsigned char *head, *data; unsigned int truesize; refcount_t users;#ifdef CONFIG_SKB_EXTENSIONS; struct skb_ext *extensions;#endif;};Members
{unnamed_union}anonymous
{unnamed_struct}anonymous
nextNext buffer in list
prevPrevious buffer in list
{unnamed_union}anonymous
devDevice we arrived on/are leaving by
dev_scratch(akadev) alternate use ofdev whendev would be
NULLrbnodeRB tree node, alternative to next/prev for netem/tcp
listqueue head
ll_nodeanchor in an llist (eg socket defer_list)
skSocket we are owned by
{unnamed_union}anonymous
tstampTime we arrived/left
skb_mstamp_ns(akatstamp) earliest departure time; start pointfor retransmit timer
cbControl buffer. Free for use by every layer. Put private vars here
{unnamed_union}anonymous
{unnamed_struct}anonymous
_skb_refdstdestination entry (with norefcount bit)
destructorDestruct function
tcp_tsorted_anchorlist structure for TCP (tp->tsorted_sent_queue)
_sk_redirsocket redirection information for skmsg
_nfctAssociated connection, if any (with nfctinfo bits)
lenLength of actual data
data_lenData length
mac_lenLength of link layer header
hdr_lenwritable header length of cloned skb
queue_mappingQueue mapping for multiqueue devices
clonedHead may be cloned (check refcnt to be sure)
nohdrPayload reference only, must not modify header
fcloneskbuff clone status
peekedthis packet has been seen already, so stats have beendone for it, don’t do them again
head_fragskb was allocated from page fragments,not allocated by
kmalloc()orvmalloc().pfmemallocskbuff was allocated from PFMEMALLOC reserves
pp_recyclemark the packet for recycling instead of freeing (impliespage_pool support on driver)
active_extensionsactive extensions (skb_ext_id types)
pkt_typePacket class
ignore_dfallow local fragmentation
dst_pending_confirmneed to confirm neighbour
ip_summedDriver fed us an IP checksum
ooo_okayallow the mapping of a socket to a queue to be changed
tstamp_typeWhen set, skb->tstamp has thedelivery_time clock base of skb->tstamp.
tc_at_ingressused within tc_classify to distinguish in/egress
tc_skip_classifydo not classify packet. set by IFB device
remcsum_offloadremote checksum offload is enabled
csum_complete_swchecksum was completed by software
csum_levelindicates the number of consecutive checksums found inthe packet minus one that have been verified asCHECKSUM_UNNECESSARY (max 3)
inner_protocol_typewhether the inner protocol isENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO
l4_hashindicate hash is a canonical 4-tuple hash over transportports.
sw_hashindicates hash was computed in software stack
wifi_acked_validwifi_acked was set
wifi_ackedwhether frame was acked on wifi or not
no_fcsRequest NIC to treat last 4 bytes as Ethernet FCS
encapsulationindicates the inner headers in the skbuff are valid
encap_hdr_csumsoftware checksum is needed
csum_validchecksum is already valid
ndisc_nodetyperouter type (from link layer)
ipvs_propertyskbuff is owned by ipvs
nf_tracenetfilter packet trace flag
offload_fwd_markPacket was L2-forwarded in hardware
offload_l3_fwd_markPacket was L3-forwarded in hardware
redirectedpacket was redirected by packet classifier
from_ingresspacket was redirected from the ingress path
nf_skip_egresspacket shall skip nf egress - see netfilter_netdev.h
decryptedDecrypted SKB
slow_grostate present at GRO time, slower prepare step required
csum_not_inetuse CRC32c to resolve CHECKSUM_PARTIAL
unreadableindicates that at least 1 of the fragments in this skb isunreadable.
tc_indexTraffic control index
alloc_cpuCPU which did the skb allocation.
{unnamed_union}anonymous
csumChecksum (must include start/offset pair)
{unnamed_struct}anonymous
csum_startOffset from skb->head where checksumming should start
csum_offsetOffset from csum_start where checksum should be stored
priorityPacket queueing priority
skb_iififindex of device we arrived on
hashthe packet hash
{unnamed_union}anonymous
vlan_allvlan fields (proto & tci)
{unnamed_struct}anonymous
vlan_protovlan encapsulation protocol
vlan_tcivlan tag control information
{unnamed_union}anonymous
napi_idid of the NAPI
structthisskb came fromsender_cpu(akanapi_id) source CPU in XPS
secmarksecurity marking
{unnamed_union}anonymous
markGeneric packet mark
reserved_tailroom(akamark) number of bytes of free space availableat the tail of an sk_buff
{unnamed_union}anonymous
inner_protocolProtocol (encapsulation)
inner_ipproto(akainner_protocol) stores ipproto whenskb->inner_protocol_type == ENCAP_TYPE_IPPROTO;
inner_transport_headerInner transport layer header (encapsulation)
inner_network_headerNetwork layer header (encapsulation)
inner_mac_headerLink layer header (encapsulation)
protocolPacket protocol from driver
transport_headerTransport layer header
network_headerNetwork layer header
mac_headerLink layer header
kcov_handleKCOV remote handle for remote coverage collection
tailTail pointer
endEnd pointer
headHead of buffer
dataData head pointer
truesizeBuffer size
usersUser count - see {datagram,tcp}.c
extensionsallocated extensions, valid if active_extensions is nonzero
Parameters
conststructsk_buff*skbbuffer
Parameters
conststructsk_buff*skbbuffer
Return
skb dst_entry, regardless of reference taken or not.
Parameters
structsk_buff*skbbuffer
Description
Resets skb dst_entry without adjusting its reference count. Useful incases where dst_entry needs to be temporarily reset and restored.Note that the returned value cannot be used directly because itmight contain SKB_DST_NOREF bit.
When in doubt, preferskb_dst_drop() overskb_dstref_steal() to correctlyhandle dst_entry reference counting.
Return
original skb dst_entry.
- voidskb_dstref_restore(structsk_buff*skb,unsignedlongrefdst)¶
restore skb dst_entry removed via
skb_dstref_steal()
Parameters
structsk_buff*skbbuffer
unsignedlongrefdstdst entry from a call to
skb_dstref_steal()
Parameters
structsk_buff*skbbuffer
structdst_entry*dstdst entry
Description
Sets skb dst, assuming a reference was taken on dst and shouldbe released byskb_dst_drop()
- voidskb_dst_set_noref(structsk_buff*skb,structdst_entry*dst)¶
sets skb dst, hopefully, without taking reference
Parameters
structsk_buff*skbbuffer
structdst_entry*dstdst entry
Description
Sets skb dst, assuming a reference was not taken on dst.If dst entry is cached, we do not take reference and dst_releasewill be avoided by refdst_drop. If dst entry is not cached, we takereference, so that last dst_release can destroy the dst immediately.
Parameters
conststructsk_buff*skbbuffer
Parameters
conststructsk_buff*skbbuffer
Parameters
structsk_buff*skbbuffer
Return
true if we can free the skb.
Parameters
structsk_buff*skbbuffer to free
Parameters
unsignedintsizesize to allocate
gfp_tpriorityallocation mask
Description
This function is a convenient wrapper around__alloc_skb().
Parameters
conststructsock*sksocket
conststructsk_buff*skbbuffer
Return
true if skb is a fast clone, and its clone is not freed.Some drivers callskb_orphan() in theirndo_start_xmit(),so we also check that didn’t happen.
- structsk_buff*alloc_skb_fclone(unsignedintsize,gfp_tpriority)¶
allocate a network buffer from fclone cache
Parameters
unsignedintsizesize to allocate
gfp_tpriorityallocation mask
Description
This function is a convenient wrapper around__alloc_skb().
Parameters
structsk_buff*skbbuffer to pad
intpadspace to pad
Description
Ensure that a buffer is followed by a padding area that is zerofilled. Used by network drivers which may DMA or transfer databeyond the buffer end onto the wire.
May return error in out of memory cases. The skb is freed on error.
- intskb_queue_empty(conststructsk_buff_head*list)¶
check if a queue is empty
Parameters
conststructsk_buff_head*listqueue head
Description
Returns true if the queue is empty, false otherwise.
- boolskb_queue_empty_lockless(conststructsk_buff_head*list)¶
check if a queue is empty
Parameters
conststructsk_buff_head*listqueue head
Description
Returns true if the queue is empty, false otherwise.This variant can be used in lockless contexts.
- boolskb_queue_is_last(conststructsk_buff_head*list,conststructsk_buff*skb)¶
check if skb is the last entry in the queue
Parameters
conststructsk_buff_head*listqueue head
conststructsk_buff*skbbuffer
Description
Returns true ifskb is the last buffer on the list.
- boolskb_queue_is_first(conststructsk_buff_head*list,conststructsk_buff*skb)¶
check if skb is the first entry in the queue
Parameters
conststructsk_buff_head*listqueue head
conststructsk_buff*skbbuffer
Description
Returns true ifskb is the first buffer on the list.
- structsk_buff*skb_queue_next(conststructsk_buff_head*list,conststructsk_buff*skb)¶
return the next packet in the queue
Parameters
conststructsk_buff_head*listqueue head
conststructsk_buff*skbcurrent buffer
Description
Return the next packet inlist afterskb. It is only valid tocall this if
skb_queue_is_last()evaluates to false.
- structsk_buff*skb_queue_prev(conststructsk_buff_head*list,conststructsk_buff*skb)¶
return the prev packet in the queue
Parameters
conststructsk_buff_head*listqueue head
conststructsk_buff*skbcurrent buffer
Description
Return the prev packet inlist beforeskb. It is only valid tocall this if
skb_queue_is_first()evaluates to false.
Parameters
structsk_buff*skbbuffer to reference
Description
Makes another reference to a socket buffer and returns a pointerto the buffer.
Parameters
conststructsk_buff*skbbuffer to check
Description
Returns true if the buffer was generated with
skb_clone()and isone of multiple shared copies of the buffer. Cloned buffers areshared data so must not be written to under normal circumstances.
Parameters
conststructsk_buff*skbbuffer to check
Description
Returns true if modifying the header part of the buffer requiresthe data to be copied.
Parameters
structsk_buff*skbbuffer to operate on
Description
See “DOC: dataref and headerless skbs”.
Parameters
conststructsk_buff*skbbuffer to check
Description
Returns true if more than one person has a reference to thisbuffer.
- structsk_buff*skb_share_check(structsk_buff*skb,gfp_tpri)¶
check if buffer is shared and if so clone it
Parameters
structsk_buff*skbbuffer to check
gfp_tpripriority for memory allocation
Description
If the buffer is shared the buffer is cloned and the old copydrops a reference. A new clone with a single reference is returned.If the buffer is not shared the original buffer is returned. Whenbeing called from interrupt status or with spinlocks held pri mustbe GFP_ATOMIC.
NULL is returned on a memory allocation failure.
Parameters
structsk_buff*skbbuffer to check
gfp_tpripriority for memory allocation
Description
If the socket buffer is a clone then this function creates a newcopy of the data, drops a reference count on the old copy and returnsthe new copy with the reference count at 1. If the buffer is not a clonethe original buffer is returned. When called with a spinlock held orfrom interrupt statepri must be
GFP_ATOMIC
NULLis returned on a memory allocation failure.
Parameters
conststructsk_buff_head*list_list to peek at
Description
Peek an
sk_buff. Unlike most other operations you _MUST_be careful with this one. A peek leaves the buffer on thelist and someone else may run off with it. You must holdthe appropriate locks or have a private queue to do this.Returns
NULLfor an empty list or a pointer to the head element.The reference count is not incremented and the reference is thereforevolatile. Use with caution.
Parameters
conststructsk_buff_head*list_list to peek at
Description
Like
skb_peek(), but the caller knows that the list is not empty.
- structsk_buff*skb_peek_next(structsk_buff*skb,conststructsk_buff_head*list_)¶
peek skb following the given one from a queue
Parameters
structsk_buff*skbskb to start from
conststructsk_buff_head*list_list to peek at
Description
Returns
NULLwhen the end of the list is met or a pointer to thenext element. The reference count is not incremented and thereference is therefore volatile. Use with caution.
Parameters
conststructsk_buff_head*list_list to peek at
Description
Peek an
sk_buff. Unlike most other operations you _MUST_be careful with this one. A peek leaves the buffer on thelist and someone else may run off with it. You must holdthe appropriate locks or have a private queue to do this.Returns
NULLfor an empty list or a pointer to the tail element.The reference count is not incremented and the reference is thereforevolatile. Use with caution.
- __u32skb_queue_len(conststructsk_buff_head*list_)¶
get queue length
Parameters
conststructsk_buff_head*list_list to measure
Description
Return the length of an
sk_buffqueue.
- __u32skb_queue_len_lockless(conststructsk_buff_head*list_)¶
get queue length
Parameters
conststructsk_buff_head*list_list to measure
Description
Return the length of an
sk_buffqueue.This variant can be used in lockless contexts.
- void__skb_queue_head_init(structsk_buff_head*list)¶
initialize non-spinlock portions of sk_buff_head
Parameters
structsk_buff_head*listqueue to initialize
Description
This initializes only the list and queue length aspects ofan sk_buff_head object. This allows to initialize the listaspects of an sk_buff_head without reinitializing things likethe spinlock. It can also be used for on-stack sk_buff_headobjects where the spinlock is known to not be used.
- voidskb_queue_splice(conststructsk_buff_head*list,structsk_buff_head*head)¶
join two skb lists, this is designed for stacks
Parameters
conststructsk_buff_head*listthe new list to add
structsk_buff_head*headthe place to add it in the first list
- voidskb_queue_splice_init(structsk_buff_head*list,structsk_buff_head*head)¶
join two skb lists and reinitialise the emptied list
Parameters
structsk_buff_head*listthe new list to add
structsk_buff_head*headthe place to add it in the first list
Description
The list atlist is reinitialised
- voidskb_queue_splice_tail(conststructsk_buff_head*list,structsk_buff_head*head)¶
join two skb lists, each list being a queue
Parameters
conststructsk_buff_head*listthe new list to add
structsk_buff_head*headthe place to add it in the first list
- voidskb_queue_splice_tail_init(structsk_buff_head*list,structsk_buff_head*head)¶
join two skb lists and reinitialise the emptied list
Parameters
structsk_buff_head*listthe new list to add
structsk_buff_head*headthe place to add it in the first list
Description
Each of the lists is a queue.The list atlist is reinitialised
- void__skb_queue_after(structsk_buff_head*list,structsk_buff*prev,structsk_buff*newsk)¶
queue a buffer at the list head
Parameters
structsk_buff_head*listlist to use
structsk_buff*prevplace after this buffer
structsk_buff*newskbuffer to queue
Description
Queue a buffer int the middle of a list. This function takes no locksand you must therefore hold required locks before calling it.
A buffer cannot be placed on two lists at the same time.
Parameters
structsk_buff_head*listlist to use
structsk_buff*newskbuffer to queue
Description
Queue a buffer at the start of a list. This function takes no locksand you must therefore hold required locks before calling it.
A buffer cannot be placed on two lists at the same time.
Parameters
structsk_buff_head*listlist to use
structsk_buff*newskbuffer to queue
Description
Queue a buffer at the end of a list. This function takes no locksand you must therefore hold required locks before calling it.
A buffer cannot be placed on two lists at the same time.
Parameters
structsk_buff_head*listlist to dequeue from
Description
Remove the head of the list. This function does not take any locksso must be used with appropriate locks held only. The head item isreturned or
NULLif the list is empty.
Parameters
structsk_buff_head*listlist to dequeue from
Description
Remove the tail of the list. This function does not take any locksso must be used with appropriate locks held only. The tail item isreturned or
NULLif the list is empty.
Parameters
structsk_buff*skbbuffer to add len to
intdeltanumber of bytes to add
- void__skb_fill_netmem_desc(structsk_buff*skb,inti,netmem_refnetmem,intoff,intsize)¶
initialise a fragment in an skb
Parameters
structsk_buff*skbbuffer containing fragment to be initialised
intifragment index to initialise
netmem_refnetmemthe netmem to use for this fragment
intoffthe offset to the data withpage
intsizethe length of the data
Description
Initialises thei’th fragment ofskb to point tosize bytes atoffsetoff withinpage.
Does not take any additional reference on the fragment.
- voidskb_fill_page_desc(structsk_buff*skb,inti,structpage*page,intoff,intsize)¶
initialise a paged fragment in an skb
Parameters
structsk_buff*skbbuffer containing fragment to be initialised
intipaged fragment index to initialise
structpage*pagethe page to use for this fragment
intoffthe offset to the data withpage
intsizethe length of the data
Description
As per__skb_fill_page_desc() -- initialises thei’th fragment ofskb to point tosize bytes at offsetoff withinpage. Inaddition updatesskb such thati is the last fragment.
Does not take any additional reference on the fragment.
- voidskb_fill_page_desc_noacc(structsk_buff*skb,inti,structpage*page,intoff,intsize)¶
initialise a paged fragment in an skb
Parameters
structsk_buff*skbbuffer containing fragment to be initialised
intipaged fragment index to initialise
structpage*pagethe page to use for this fragment
intoffthe offset to the data withpage
intsizethe length of the data
Description
Variant ofskb_fill_page_desc() which does not deal withpfmemalloc, if page is not owned by us.
Parameters
conststructsk_buff*skbbuffer to check
Description
Return the number of bytes of free space at the head of an
sk_buff.
Parameters
conststructsk_buff*skbbuffer to check
Description
Return the number of bytes of free space at the tail of an sk_buff
Parameters
conststructsk_buff*skbbuffer to check
Description
Return the number of bytes of free space at the tail of an sk_buffallocated by
sk_stream_alloc()
Parameters
structsk_buff*skbbuffer to alter
intlenbytes to move
Description
Increase the headroom of an empty
sk_buffby reducing the tailroom. This is only allowed for an empty buffer.
- voidskb_tailroom_reserve(structsk_buff*skb,unsignedintmtu,unsignedintneeded_tailroom)¶
adjust reserved_tailroom
Parameters
structsk_buff*skbbuffer to alter
unsignedintmtumaximum amount of headlen permitted
unsignedintneeded_tailroomminimum amount of reserved_tailroom
Description
Set reserved_tailroom so that headlen can be as large as possible butnot larger than mtu and tailroom cannot be smaller thanneeded_tailroom.The required headroom should already have been reserved before usingthis function.
Parameters
structsk_buff*skbbuffer to alter
Description
Hardened version ofskb_reset_transport_header().
Return
true if the operation was a success.
- voidpskb_trim_unique(structsk_buff*skb,unsignedintlen)¶
remove end from a paged unique (not cloned) buffer
Parameters
structsk_buff*skbbuffer to alter
unsignedintlennew length
Description
This is identical to pskb_trim except that the caller knows thatthe skb is not cloned so we should never get an error due to out-of-memory.
Parameters
structsk_buff*skbbuffer to orphan
Description
If a buffer currently has an owner then we call the owner’sdestructor function and make theskb unowned. The buffer continuesto exist but is no longer charged to its former owner.
Parameters
structsk_buff*skbbuffer to orphan frags from
gfp_tgfp_maskallocation mask for replacement pages
Description
For each frag in the SKB which needs a destructor (i.e. has anowner) create a copy of that frag and release the originalpage by calling the destructor.
- void__skb_queue_purge_reason(structsk_buff_head*list,enumskb_drop_reasonreason)¶
empty a list
Parameters
structsk_buff_head*listlist to empty
enumskb_drop_reasonreasondrop reason
Description
Delete all buffers on an
sk_bufflist. Each buffer is removed fromthe list and one reference dropped. This function does not take thelist lock and the caller must hold the relevant locks to use it.
- void*netdev_alloc_frag(unsignedintfragsz)¶
allocate a page fragment
Parameters
unsignedintfragszfragment size
Description
Allocates a frag from a page for receive buffer.Uses GFP_ATOMIC allocations.
- structsk_buff*netdev_alloc_skb(structnet_device*dev,unsignedintlength)¶
allocate an skbuff for rx on a specific device
Parameters
structnet_device*devnetwork device to receive on
unsignedintlengthlength to allocate
Description
Allocate a new
sk_buffand assign it a usage count of one. Thebuffer has unspecified headroom built in. Users should allocatethe headroom they think they need without accounting for thebuilt in space. The built in space is used for optimisations.
NULLis returned if there is no free memory. Although this functionallocates memory it can be called from an interrupt.
- structpage*__dev_alloc_pages(gfp_tgfp_mask,unsignedintorder)¶
allocate page for network Rx
Parameters
gfp_tgfp_maskallocation priority. Set __GFP_NOMEMALLOC if not for network Rx
unsignedintordersize of the allocation
Description
Allocate a new page.
NULL is returned if there is no free memory.
- structpage*__dev_alloc_page(gfp_tgfp_mask)¶
allocate a page for network Rx
Parameters
gfp_tgfp_maskallocation priority. Set __GFP_NOMEMALLOC if not for network Rx
Description
Allocate a new page.
NULL is returned if there is no free memory.
Parameters
conststructpage*pagethe page to test
Description
A page shouldn’t be considered for reusing/recycling if it was allocatedunder memory pressure or at a distant memory node.
Return
false if this page should be returned to page allocator, trueotherwise.
- voidskb_propagate_pfmemalloc(conststructpage*page,structsk_buff*skb)¶
Propagate pfmemalloc if skb is allocated after RX page
Parameters
conststructpage*pageThe page that was allocated from skb_alloc_page
structsk_buff*skbThe skb that may need pfmemalloc set
- unsignedintskb_frag_off(constskb_frag_t*frag)¶
Returns the offset of a skb fragment
Parameters
constskb_frag_t*fragthe paged fragment
- voidskb_frag_off_add(skb_frag_t*frag,intdelta)¶
Increments the offset of a skb fragment bydelta
Parameters
skb_frag_t*fragskb fragment
intdeltavalue to add
- voidskb_frag_off_set(skb_frag_t*frag,unsignedintoffset)¶
Sets the offset of a skb fragment
Parameters
skb_frag_t*fragskb fragment
unsignedintoffsetoffset of fragment
- voidskb_frag_off_copy(skb_frag_t*fragto,constskb_frag_t*fragfrom)¶
Sets the offset of a skb fragment from another fragment
Parameters
skb_frag_t*fragtoskb fragment where offset is set
constskb_frag_t*fragfromskb fragment offset is copied from
- structnet_iov*skb_frag_net_iov(constskb_frag_t*frag)¶
retrieve the net_iov referred to by fragment
Parameters
constskb_frag_t*fragthe fragment
Return
thestructnet_iov associated withfrag. Returns NULL if thisfrag has no associated net_iov.
- structpage*skb_frag_page(constskb_frag_t*frag)¶
retrieve the page referred to by a paged fragment
Parameters
constskb_frag_t*fragthe paged fragment
Return
thestructpage associated withfrag. Returns NULL if this fraghas no associated page.
- netmem_refskb_frag_netmem(constskb_frag_t*frag)¶
retrieve the netmem referred to by a fragment
Parameters
constskb_frag_t*fragthe fragment
Return
thenetmem_ref associated withfrag.
- void*skb_frag_address(constskb_frag_t*frag)¶
gets the address of the data contained in a paged fragment
Parameters
constskb_frag_t*fragthe paged fragment buffer
Return
the address of the data withinfrag. The page must alreadybe mapped.
- void*skb_frag_address_safe(constskb_frag_t*frag)¶
gets the address of the data contained in a paged fragment
Parameters
constskb_frag_t*fragthe paged fragment buffer
Return
the address of the data withinfrag. Checks that the pageis mapped and returnsNULL otherwise.
- voidskb_frag_page_copy(skb_frag_t*fragto,constskb_frag_t*fragfrom)¶
sets the page in a fragment from another fragment
Parameters
skb_frag_t*fragtoskb fragment where page is set
constskb_frag_t*fragfromskb fragment page is copied from
- dma_addr_t__skb_frag_dma_map(structdevice*dev,constskb_frag_t*frag,size_toffset,size_tsize,enumdma_data_directiondir)¶
maps a paged fragment via the DMA API
Parameters
structdevice*devthe device to map the fragment to
constskb_frag_t*fragthe paged fragment to map
size_toffsetthe offset within the fragment (starting at thefragment’s own offset)
size_tsizethe number of bytes to map
enumdma_data_directiondirthe direction of the mapping (
PCI_DMA_*)
Description
Maps the page associated withfrag todevice.
Parameters
conststructsk_buff*skbbuffer to check
unsignedintlenlength up to which to write
Description
Returns true if modifying the header part of the cloned bufferdoes not requires the data to be copied.
Parameters
structsk_buff*skbbuffer to cow
unsignedintheadroomneeded headroom
Description
If the skb passed lacks sufficient headroom or its data partis shared, data is reallocated. If reallocation fails, an erroris returned and original skb is not changed.
The result is skb with writable area skb->head...skb->tailand at leastheadroom of space at head.
Parameters
structsk_buff*skbbuffer to cow
unsignedintheadroomneeded headroom
Description
This function is identical to skb_cow except that we replace theskb_cloned check by skb_header_cloned. It should be used whenyou only need to push on some header and do not need to modifythe data.
Parameters
structsk_buff*skbbuffer to pad
unsignedintlenminimal length
Description
Pads up a buffer to ensure the trailing bytes exist and areblanked. If the buffer already contains sufficient data itis untouched. Otherwise it is extended. Returns zero onsuccess. The skb is freed on error.
- int__skb_put_padto(structsk_buff*skb,unsignedintlen,boolfree_on_error)¶
increase size and pad an skbuff up to a minimal size
Parameters
structsk_buff*skbbuffer to pad
unsignedintlenminimal length
boolfree_on_errorfree buffer on error
Description
Pads up a buffer to ensure the trailing bytes exist and areblanked. If the buffer already contains sufficient data itis untouched. Otherwise it is extended. Returns zero onsuccess. The skb is freed on error iffree_on_error is true.
- intskb_put_padto(structsk_buff*skb,unsignedintlen)¶
increase size and pad an skbuff up to a minimal size
Parameters
structsk_buff*skbbuffer to pad
unsignedintlenminimal length
Description
Pads up a buffer to ensure the trailing bytes exist and areblanked. If the buffer already contains sufficient data itis untouched. Otherwise it is extended. Returns zero onsuccess. The skb is freed on error.
Parameters
structsk_buff*skbbuffer to linarize
Description
If there is no free memory -ENOMEM is returned, otherwise zerois returned and the old skb data released.
Parameters
conststructsk_buff*skbbuffer to test
Return
true if the skb has at least one frag that might be modifiedby an external entity (as invmsplice()/sendfile())
Parameters
structsk_buff*skbbuffer to process
Description
If there is no free memory -ENOMEM is returned, otherwise zerois returned and the old skb data released.
- voidskb_postpull_rcsum(structsk_buff*skb,constvoid*start,unsignedintlen)¶
update checksum for received skb after pull
Parameters
structsk_buff*skbbuffer to update
constvoid*startstart of data before pull
unsignedintlenlength of data pulled
Description
After doing a pull on a received packet, you need to call this toupdate the CHECKSUM_COMPLETE checksum, or set ip_summed toCHECKSUM_NONE so that it can be recomputed from scratch.
- voidskb_postpush_rcsum(structsk_buff*skb,constvoid*start,unsignedintlen)¶
update checksum for received skb after push
Parameters
structsk_buff*skbbuffer to update
constvoid*startstart of data after push
unsignedintlenlength of data pushed
Description
After doing a push on a received packet, you need to call this toupdate the CHECKSUM_COMPLETE checksum.
Parameters
structsk_buff*skbbuffer to update
unsignedintlenlength of data pulled
Description
This function performs an skb_push on the packet and updatesthe CHECKSUM_COMPLETE checksum. It should be used onreceive path processing instead of skb_push unless you knowthat the checksum difference is zero (e.g., a valid IP header)or you are setting ip_summed to CHECKSUM_NONE.
Parameters
structsk_buff*skbbuffer to trim
unsignedintlennew length
Description
This is exactly the same as pskb_trim except that it ensures thechecksum of received packets are still valid after the operation.It can change skb pointers.
- boolskb_needs_linearize(structsk_buff*skb,netdev_features_tfeatures)¶
check if we need to linearize a given skb depending on the given device features.
Parameters
structsk_buff*skbsocket buffer to check
netdev_features_tfeaturesnet device features
Description
Returns true if either:1. skb has frag_list and the device doesn’t support FRAGLIST, or2. skb is fragmented and the device does not support SG.
- voidskb_get_timestamp(conststructsk_buff*skb,struct__kernel_old_timeval*stamp)¶
get timestamp from a skb
Parameters
conststructsk_buff*skbskb to get stamp from
struct__kernel_old_timeval*stamppointer to
struct__kernel_old_timevalto store stamp in
Description
Timestamps are stored in the skb as offsets to a base timestamp.This function converts the offset back to a
structtimevaland storesit in stamp.
- voidskb_data_move(structsk_buff*skb,constintlen,constunsignedintn)¶
Move packet data and metadata after
skb_push()orskb_pull().
Parameters
structsk_buff*skbpacket to operate on
constintlennumber of bytes pushed or pulled from
sk_buff->dataconstunsignedintnnumber of bytes to
memmove()from pre-push/pullsk_buff->data
Description
Movesn bytes of packet data, can be zero, and all bytes of skb metadata.
Assumes metadata is located immediately beforesk_buff->data prior to thepush/pull, and that sufficient headroom exists to hold it after anskb_push(). Otherwise, metadata is cleared and a one-time warning is issued.
Preferskb_postpull_data_move() orskb_postpush_data_move() to calling thishelper directly.
- voidskb_postpull_data_move(structsk_buff*skb,constunsignedintlen,constunsignedintn)¶
Move packet data and metadata after
skb_pull().
Parameters
structsk_buff*skbpacket to operate on
constunsignedintlennumber of bytes pulled from
sk_buff->dataconstunsignedintnnumber of bytes to
memmove()from pre-pullsk_buff->data
Description
Seeskb_data_move() for details.
- voidskb_postpush_data_move(structsk_buff*skb,constunsignedintlen,constunsignedintn)¶
Move packet data and metadata after
skb_push().
Parameters
structsk_buff*skbpacket to operate on
constunsignedintlennumber of bytes pushed onto
sk_buff->dataconstunsignedintnnumber of bytes to
memmove()from pre-pushsk_buff->data
Description
Seeskb_data_move() for details.
- voidskb_complete_tx_timestamp(structsk_buff*skb,structskb_shared_hwtstamps*hwtstamps)¶
deliver cloned skb with tx timestamps
Parameters
structsk_buff*skbclone of the original outgoing packet
structskb_shared_hwtstamps*hwtstampshardware time stamps
Description
PHY drivers may accept clones of transmitted packets fortimestamping via their phy_driver.txtstamp method. These driversmust call this function to return the skb back to the stack with atimestamp.
- voidskb_tstamp_tx(structsk_buff*orig_skb,structskb_shared_hwtstamps*hwtstamps)¶
queue clone of skb with send time stamps
Parameters
structsk_buff*orig_skbthe original outgoing packet
structskb_shared_hwtstamps*hwtstampshardware time stamps, may be NULL if not available
Description
If the skb has a socket associated, then this function clones theskb (thus sharing the actual data and optional structures), storesthe optional hardware time stamping information (if non NULL) orgenerates a software time stamp (otherwise), then queues the cloneto the error queue of the socket. Errors are silently ignored.
Parameters
structsk_buff*skbA socket buffer.
Description
Ethernet MAC Drivers should call this function in theirhard_xmit()function immediately before giving the sk_buff to the MAC hardware.
Specifically, one should make absolutely sure that this function iscalled before TX completion of this packet can trigger. Otherwisethe packet could potentially already be freed.
Parameters
structsk_buff*skbthe original outgoing packet
boolackedack status
Parameters
structsk_buff*skbpacket to process
Description
This function calculates the checksum over the entire packet plusthe value of skb->csum. The latter can be used to supply thechecksum of a pseudo header as used by TCP/UDP. It returns thechecksum.
For protocols that contain complete checksums such as ICMP/TCP/UDP,this function can be used to verify that checksum on receivedpackets. In that case the function should return zero if thechecksum is correct. In particular, this function will return zeroif skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that thehardware has already verified the correctness of the checksum.
- structskb_ext¶
sk_buff extensions
Definition:
struct skb_ext { refcount_t refcnt; u8 offset[SKB_EXT_NUM]; u8 chunks; char data[] ;};Members
refcnt1 on allocation, deallocated on 0
offsetoffset to add todata to obtain extension address
chunkssize currently allocated, stored in SKB_EXT_ALIGN_SHIFT units
datastart of extension data, variable sized
Note
- offsets/lengths are stored in chunks of 8 bytes, this allows
to use ‘u8’ types while allowing up to 2kb worth of extension data.
Parameters
conststructsk_buff*skbskb to check
Description
fresh skbs have their ip_summed set to CHECKSUM_NONE.Instead of forcing ip_summed to CHECKSUM_NONE, we canuse this helper, to document places where we make this assertion.
Parameters
conststructsk_buff*skbskb to check
Description
The head on skbs build around a head frag can be removed if they arenot cloned. This function returns true if the skb head is locked downdue to either being allocated via kmalloc, or by being a clone withmultiple references to the head.
- structsock_common¶
minimal network layer representation of sockets
Definition:
struct sock_common { union { __addrpair skc_addrpair; struct { __be32 skc_daddr; __be32 skc_rcv_saddr; }; }; union { unsigned int skc_hash; __u16 skc_u16hashes[2]; }; union { __portpair skc_portpair; struct { __be16 skc_dport; __u16 skc_num; }; }; unsigned short skc_family; volatile unsigned char skc_state; unsigned char skc_reuse:4; unsigned char skc_reuseport:1; unsigned char skc_ipv6only:1; unsigned char skc_net_refcnt:1; unsigned char skc_bypass_prot_mem:1; int skc_bound_dev_if; union { struct hlist_node skc_bind_node; struct hlist_node skc_portaddr_node; }; struct proto *skc_prot; possible_net_t skc_net;#if IS_ENABLED(CONFIG_IPV6); struct in6_addr skc_v6_daddr; struct in6_addr skc_v6_rcv_saddr;#endif; atomic64_t skc_cookie; union { unsigned long skc_flags; struct sock *skc_listener; struct inet_timewait_death_row *skc_tw_dr; }; union { struct hlist_node skc_node; struct hlist_nulls_node skc_nulls_node; }; unsigned short skc_tx_queue_mapping;#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING; unsigned short skc_rx_queue_mapping;#endif; union { int skc_incoming_cpu; u32 skc_rcv_wnd; u32 skc_tw_rcv_nxt; }; refcount_t skc_refcnt;};Members
{unnamed_union}anonymous
skc_addrpair8-byte-aligned __u64
unionofskc_daddr &skc_rcv_saddr{unnamed_struct}anonymous
skc_daddrForeign IPv4 addr
skc_rcv_saddrBound local IPv4 addr
{unnamed_union}anonymous
skc_hashhash value used with various protocol lookup tables
skc_u16hashestwo u16 hash values used by UDP lookup tables
{unnamed_union}anonymous
skc_portpair__u32
unionofskc_dport &skc_num{unnamed_struct}anonymous
skc_dportplaceholder for inet_dport/tw_dport
skc_numplaceholder for inet_num/tw_num
skc_familynetwork address family
skc_stateConnection state
skc_reuseSO_REUSEADDRsettingskc_reuseportSO_REUSEPORTsettingskc_ipv6onlysocket is IPV6 only
skc_net_refcntsocket is using net ref counting
skc_bypass_prot_membypass the per-protocol memory accounting for skb
skc_bound_dev_ifbound device index if != 0
{unnamed_union}anonymous
skc_bind_nodebind hash linkage for various protocol lookup tables
skc_portaddr_nodesecond hash linkage for UDP/UDP-Lite protocol
skc_protprotocol handlers inside a network family
skc_netreference to the network namespace of this socket
skc_v6_daddrIPV6 destination address
skc_v6_rcv_saddrIPV6 source address
skc_cookiesocket’s cookie value
{unnamed_union}anonymous
skc_flagsplace holder for sk_flags
SO_LINGER(l_onoff),SO_BROADCAST,SO_KEEPALIVE,SO_OOBINLINEsettings,SO_TIMESTAMPINGsettingsskc_listenerconnection request listener socket (aka rsk_listener)[
unionwithskc_flags]skc_tw_dr(aka tw_dr) ptr to
structinet_timewait_death_row[unionwithskc_flags]{unnamed_union}anonymous
skc_nodemain hash linkage for various protocol lookup tables
skc_nulls_nodemain hash linkage for TCP/UDP/UDP-Lite protocol
skc_tx_queue_mappingtx queue number for this connection
skc_rx_queue_mappingrx queue number for this connection
{unnamed_union}anonymous
skc_incoming_cpurecord/match cpu processing incoming packets
skc_rcv_wnd(aka rsk_rcv_wnd) TCP receive window size (possibly scaled)[
unionwithskc_incoming_cpu]skc_tw_rcv_nxt(aka tw_rcv_nxt) TCP window next expected seq number[
unionwithskc_incoming_cpu]skc_refcntreference count
Description
This is the minimal network layer representation of sockets, the headerfor
structsockandstructinet_timewait_sock.
- structsock¶
network layer representation of sockets
Definition:
struct sock { struct sock_common __sk_common;#define sk_node __sk_common.skc_node;#define sk_nulls_node __sk_common.skc_nulls_node;#define sk_refcnt __sk_common.skc_refcnt;#define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping;#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING;#define sk_rx_queue_mapping __sk_common.skc_rx_queue_mapping;#endif;#define sk_dontcopy_begin __sk_common.skc_dontcopy_begin;#define sk_dontcopy_end __sk_common.skc_dontcopy_end;#define sk_hash __sk_common.skc_hash;#define sk_portpair __sk_common.skc_portpair;#define sk_num __sk_common.skc_num;#define sk_dport __sk_common.skc_dport;#define sk_addrpair __sk_common.skc_addrpair;#define sk_daddr __sk_common.skc_daddr;#define sk_rcv_saddr __sk_common.skc_rcv_saddr;#define sk_family __sk_common.skc_family;#define sk_state __sk_common.skc_state;#define sk_reuse __sk_common.skc_reuse;#define sk_reuseport __sk_common.skc_reuseport;#define sk_ipv6only __sk_common.skc_ipv6only;#define sk_net_refcnt __sk_common.skc_net_refcnt;#define sk_bypass_prot_mem __sk_common.skc_bypass_prot_mem;#define sk_bound_dev_if __sk_common.skc_bound_dev_if;#define sk_bind_node __sk_common.skc_bind_node;#define sk_prot __sk_common.skc_prot;#define sk_net __sk_common.skc_net;#define sk_v6_daddr __sk_common.skc_v6_daddr;#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr;#define sk_cookie __sk_common.skc_cookie;#define sk_incoming_cpu __sk_common.skc_incoming_cpu;#define sk_flags __sk_common.skc_flags;#define sk_rxhash __sk_common.skc_rxhash; atomic_t sk_drops; __s32 sk_peek_off; struct sk_buff_head sk_error_queue; struct sk_buff_head sk_receive_queue; struct { atomic_t rmem_alloc; int len; struct sk_buff *head; struct sk_buff *tail; } sk_backlog;#define sk_rmem_alloc sk_backlog.rmem_alloc; struct dst_entry *sk_rx_dst; int sk_rx_dst_ifindex; u32 sk_rx_dst_cookie;#ifdef CONFIG_NET_RX_BUSY_POLL; unsigned int sk_ll_usec; unsigned int sk_napi_id; u16 sk_busy_poll_budget; u8 sk_prefer_busy_poll;#endif; u8 sk_userlocks; int sk_rcvbuf; struct sk_filter *sk_filter; union { struct socket_wq *sk_wq; }; void (*sk_data_ready)(struct sock *sk); long sk_rcvtimeo; int sk_rcvlowat; int sk_err; struct socket *sk_socket;#ifdef CONFIG_MEMCG; struct mem_cgroup *sk_memcg;#endif;#ifdef CONFIG_XFRM; struct xfrm_policy *sk_policy[2];#endif;#if IS_ENABLED(CONFIG_INET_PSP); struct psp_assoc *psp_assoc;#endif; socket_lock_t sk_lock; u32 sk_reserved_mem; int sk_forward_alloc; u32 sk_tsflags; int sk_write_pending; atomic_t sk_omem_alloc; int sk_err_soft; int sk_wmem_queued; refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; union { struct sk_buff *sk_send_head; struct rb_root tcp_rtx_queue; }; struct sk_buff_head sk_write_queue; struct page_frag sk_frag; union { struct timer_list sk_timer; struct timer_list tcp_retransmit_timer; struct timer_list mptcp_retransmit_timer; }; unsigned long sk_pacing_rate; atomic_t sk_zckey; atomic_t sk_tskey; unsigned long sk_tx_queue_mapping_jiffies; u32 sk_dst_pending_confirm; u32 sk_pacing_status; unsigned long sk_max_pacing_rate; long sk_sndtimeo; u32 sk_priority; u32 sk_mark; kuid_t sk_uid; u16 sk_protocol; u16 sk_type; struct dst_entry *sk_dst_cache; netdev_features_t sk_route_caps;#ifdef CONFIG_SOCK_VALIDATE_XMIT; struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk, struct net_device *dev, struct sk_buff *skb);#endif; u16 sk_gso_type; u16 sk_gso_max_segs; unsigned int sk_gso_max_size; gfp_t sk_allocation; u32 sk_txhash; int sk_sndbuf; u8 sk_pacing_shift; bool sk_use_task_frag; u8 sk_gso_disabled : 1, sk_kern_sock : 1, sk_no_check_tx : 1, sk_no_check_rx : 1; u8 sk_shutdown; unsigned long sk_lingertime; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; u32 sk_ack_backlog; u32 sk_max_ack_backlog; unsigned long sk_ino; spinlock_t sk_peer_lock; int sk_bind_phc; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; ktime_t sk_stamp;#if BITS_PER_LONG==32; seqlock_t sk_stamp_seq;#endif; int sk_disconnects; union { u8 sk_txrehash; u8 sk_scm_recv_flags; struct { u8 sk_scm_credentials : 1, sk_scm_security : 1, sk_scm_pidfd : 1, sk_scm_rights : 1, sk_scm_unused : 4; }; }; u8 sk_clockid; u8 sk_txtime_deadline_mode : 1, sk_txtime_report_errors : 1, sk_txtime_unused : 6;#define SK_BPF_CB_FLAG_TEST(SK, FLAG) ((SK)->sk_bpf_cb_flags & (FLAG)); u8 sk_bpf_cb_flags; void *sk_user_data;#ifdef CONFIG_SECURITY; void *sk_security;#endif; struct sock_cgroup_data sk_cgrp_data; void (*sk_state_change)(struct sock *sk); void (*sk_write_space)(struct sock *sk); void (*sk_error_report)(struct sock *sk); int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); void (*sk_destruct)(struct sock *sk); struct sock_reuseport *sk_reuseport_cb;#ifdef CONFIG_BPF_SYSCALL; struct bpf_local_storage *sk_bpf_storage;#endif; struct numa_drop_counters *sk_drop_counters; struct rcu_head sk_rcu; netns_tracker ns_tracker; struct xarray sk_user_frags;#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES); struct module *sk_owner;#endif;};Members
__sk_commonshared layout with inet_timewait_sock
sk_dropsraw/udp drops counter
sk_peek_offcurrent peek_offset value
sk_error_queuerarely used
sk_receive_queueincoming packets
sk_backlogalways used with the per-socket spinlock held
sk_rx_dstreceive input route used by early demux
sk_rx_dst_ifindexifindex forsk_rx_dst
sk_rx_dst_cookiecookie forsk_rx_dst
sk_ll_usecusecs to busypoll when there is no data
sk_napi_idid of the last napi context to receive data for sk
sk_busy_poll_budgetnapi processing budget when busypolling
sk_prefer_busy_pollprefer busypolling over softirq processing
sk_userlocksSO_SNDBUFandSO_RCVBUFsettingssk_rcvbufsize of receive buffer in bytes
sk_filtersocket filtering instructions
{unnamed_union}anonymous
sk_wqsock wait queue and async head
sk_data_readycallback to indicate there is data to be processed
sk_rcvtimeoSO_RCVTIMEOsettingsk_rcvlowatSO_RCVLOWATsettingsk_errlast error
sk_socketIdentd and reporting IO signals
sk_memcgthis socket’s memory cgroup association
sk_policyflow policy
psp_assocPSP association, if socket is PSP-secured
sk_locksynchronizer
sk_reserved_memspace reserved and non-reclaimable for the socket
sk_forward_allocspace allocated forward
sk_tsflagsSO_TIMESTAMPING flags
sk_write_pendinga write to stream socket waits to start
sk_omem_alloc“o” is “option” or “other”
sk_err_softerrors that don’t cause failure but are the cause of apersistent failure not just ‘timed out’
sk_wmem_queuedpersistent queue size
sk_wmem_alloctransmit queue bytes committed
sk_tsq_flagsTCP Small Queues flags
{unnamed_union}anonymous
sk_send_headfront of stuff to transmit
tcp_rtx_queueTCP re-transmit queue [
unionwithsk_send_head]sk_write_queuePacket sending queue
sk_fragcached page frag
{unnamed_union}anonymous
sk_timersock cleanup timer
tcp_retransmit_timertcp retransmit timer
mptcp_retransmit_timermptcp retransmit timer
sk_pacing_ratePacing rate (if supported by transport/packet scheduler)
sk_zckeycounter to order MSG_ZEROCOPY notifications
sk_tskeycounter to disambiguate concurrent tstamp requests
sk_tx_queue_mapping_jiffiestime in jiffies of lastsk_tx_queue_mapping refresh.
sk_dst_pending_confirmneed to confirm neighbour
sk_pacing_statusPacing status (requested, handled by sch_fq)
sk_max_pacing_rateMaximum pacing rate (
SO_MAX_PACING_RATE)sk_sndtimeoSO_SNDTIMEOsettingsk_prioritySO_PRIORITYsettingsk_markgeneric packet mark
sk_uiduser id of owner
sk_protocolwhich protocol this socket belongs in this network family
sk_typesocket type (
SOCK_STREAM, etc)sk_dst_cachedestination cache
sk_route_capsroute capabilities (e.g.
NETIF_F_TSO)sk_validate_xmit_skbptr to an optional validate function
sk_gso_typeGSO type (e.g.
SKB_GSO_TCPV4)sk_gso_max_segsMaximum number of GSO segments
sk_gso_max_sizeMaximum GSO segment size to build
sk_allocationallocation mode
sk_txhashcomputed flow hash for use on transmit
sk_sndbufsize of send buffer in bytes
sk_pacing_shiftscaling factor for TCP Small Queues
sk_use_task_fragallow
sk_page_frag()to use current->task_frag.Sockets that can be used under memory reclaim shouldset this to false.sk_gso_disabledif set, NETIF_F_GSO_MASK is forbidden.
sk_kern_sockTrue if sock is using kernel lock classes
sk_no_check_txSO_NO_CHECKsetting, set checksum in TX packetssk_no_check_rxallow zero checksum in RX packets
sk_shutdownmask of
SEND_SHUTDOWNand/orRCV_SHUTDOWNsk_lingertimeSO_LINGERl_linger settingsk_prot_creatorsk_prot of original sock creator (see ipv6_setsockopt,IPV6_ADDRFORM for instance)
sk_callback_lockused with the callbacks in the end of this struct
sk_ack_backlogcurrent listen backlog
sk_max_ack_backloglisten backlog set in
listen()sk_inoinode number (zero if orphaned)
sk_peer_locklock protectingsk_peer_pid andsk_peer_cred
sk_bind_phcSO_TIMESTAMPING bind PHC index of PTP virtual clockfor timestamping
sk_peer_pidstructpidfor this socket’s peersk_peer_credSO_PEERCREDsettingsk_stamptime stamp of last packet received
sk_stamp_seqlock for accessing sk_stamp on 32 bit architectures only
sk_disconnectsnumber of disconnect operations performed on this sock
{unnamed_union}anonymous
sk_txrehashenable TX hash rethink
sk_scm_recv_flagsall flags used by
scm_recv(){unnamed_struct}anonymous
sk_scm_credentialsflagged by SO_PASSCRED to recv SCM_CREDENTIALS
sk_scm_securityflagged by SO_PASSSEC to recv SCM_SECURITY
sk_scm_pidfdflagged by SO_PASSPIDFD to recv SCM_PIDFD
sk_scm_rightsflagged by SO_PASSRIGHTS to recv SCM_RIGHTS
sk_scm_unusedunused flags for
scm_recv()sk_clockidclockid used by time-based scheduling (SO_TXTIME)
sk_txtime_deadline_modeset deadline mode for SO_TXTIME
sk_txtime_report_errorsset report errors mode for SO_TXTIME
sk_txtime_unusedunused txtime flags
sk_bpf_cb_flagsused in
bpf_setsockopt()sk_user_dataRPC layer private data. Write-protected bysk_callback_lock.
sk_securityused by security modules
sk_cgrp_datacgroup data for this cgroup
sk_state_changecallback to indicate change in the state of the sock
sk_write_spacecallback to indicate there is bf sending space available
sk_error_reportcallback to indicate errors (e.g.
MSG_ERRQUEUE)sk_backlog_rcvcallback to process the backlog
sk_destructcalled at sock freeing time, i.e. when all refcnt == 0
sk_reuseport_cbreuseport group container
sk_bpf_storageptr to cache and control for bpf_sk_storage
sk_drop_countersoptional pointer to numa_drop_counters
sk_rcuused during RCU grace period
ns_trackertracker for netns reference
sk_user_fragsxarray of pages the user is holding a reference on.
sk_ownerreference to the real owner of the socket that calls
sock_lock_init_class_and_name().
Parameters
conststructsock*sksocket
- void*__locked_read_sk_user_data_with_flags(conststructsock*sk,uintptr_tflags)¶
return the pointer only if argument flags all has been set in sk_user_data. Otherwise return NULL
Parameters
conststructsock*sksocket
uintptr_tflagsflag bits
Description
The caller must be holding sk->sk_callback_lock.
- void*__rcu_dereference_sk_user_data_with_flags(conststructsock*sk,uintptr_tflags)¶
return the pointer only if argument flags all has been set in sk_user_data. Otherwise return NULL
Parameters
conststructsock*sksocket
uintptr_tflagsflag bits
- sk_for_each_entry_offset_rcu¶
sk_for_each_entry_offset_rcu(tpos,pos,head,offset)
iterate over a list at a given
structoffset
Parameters
tposthe type * to use as a loop cursor.
posthe
structhlist_nodeto use as a loop cursor.headthe head for your list.
offsetoffset of hlist_node within the struct.
- SOCK_CONNECT_BIND¶
SOCK_CONNECT_BIND
sock->sk_userlocksflag for auto-bind atconnect()time
Parameters
structsock*sksocket
Description
This version should be used for very small section, where process won’t blockreturn false if fast path is taken:
sk_lock.slock locked, owned = 0, BH disabled
return true if slow path is taken:
sk_lock.slock unlocked, owned = 1, BH enabled
Parameters
structsock*sksocket
boolslowslow mode
Description
fast unlock socket for user context.If slow mode is on, we call regularrelease_sock()
Parameters
conststructsock*sksocket
Return
sk_wmem_alloc minus initial offset of one
Parameters
conststructsock*sksocket
Return
sk_rmem_alloc
Parameters
conststructsock*sksocket
Return
true if socket has write or read allocations
- boolskwq_has_sleeper(structsocket_wq*wq)¶
check if there are any waiting processes
Parameters
structsocket_wq*wqstructsocket_wq
Return
true if socket_wq has waiting processes
Description
The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memorybarrier call. They were added due to the race found within the tcp code.
Consider following tcp code paths:
CPU1 CPU2sys_select receive packet... ...__add_wait_queue update tp->rcv_nxt... ...tp->rcv_nxt check sock_def_readable... {schedule rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (wq && waitqueue_active(&wq->wait)) wake_up_interruptible(&wq->wait) ... }The race for tcp fires when the __add_wait_queue changes done by CPU1 stayin its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1could then endup calling schedule and sleep forever if there are no moredata on the socket.
Parameters
structfile*filpfile
structsocket*socksocket to wait on
poll_table*ppoll_table
Description
See the comments in the wq_has_sleeper function.
Parameters
structsock*sksocket
Description
Use the per task page_frag instead of the per socket one foroptimization when we know that we’re in process context and owneverything that’s associated withcurrent.
Both direct reclaim and page faults can nest inside othersocket operations and end up recursing intosk_page_frag()while it’s already in use: explicitly avoid task page_fragwhen users disable sk_use_task_frag.
Return
a per task page_frag if context allows that,otherwise a per socket one.
- void_sock_tx_timestamp(structsock*sk,conststructsockcm_cookie*sockc,__u8*tx_flags,__u32*tskey)¶
checks whether the outgoing packet is to be time stamped
Parameters
structsock*sksocket sending this packet
conststructsockcm_cookie*sockcpointer to socket cmsg cookie to get timestamping info
__u8*tx_flagscompleted with instructions for time stamping
__u32*tskeyfilled in with next sk_tskey (not for TCP, which uses seqno)
Note
callers should take care of initial*tx_flags value (usually 0)
Parameters
structsock*sksocket to eat this skb from
structsk_buff*skbsocket buffer to eat
Description
This routine must be called with interrupts disabled or with the socketlocked so that the sk_buff queue operation is ok.
Parameters
structsocket*socksocket
intflagsfile status flags
constchar*dnameprotocol name
Description
Returns the
filebound withsock, implicitly storing itin sock->file. If dname isNULL, sets to “”.On failuresock is released, and an ERR pointer is returned.
This function uses GFP_KERNEL internally.
Parameters
structfile*filefile
Description
On failure returns
NULL.
Parameters
intfdfile handle
int*errpointer to an error code return
Description
The file handle passed in is locked and the socket it is boundto is returned. If an error occurs the err pointer is overwrittenwith a negative errno code and NULL is returned. The function checksfor both invalid handles and passing a handle which is not a socket.
On a success the socket object pointer is returned.
Parameters
voidno arguments
Description
Allocate a new inode and socket object. The two are bound togetherand initialised. The socket is then returned. If we are out of inodesNULL is returned. This functions uses GFP_KERNEL internally.
Parameters
structsocket*socksocket to close
Description
The socket is released from the protocol stack if it has a releasecallback, and the inode is then released if the socket is bound toan inode not a file.
Parameters
structsocket*socksocket
structmsghdr*msgmessage to send
Description
Sendsmsg throughsock, passing through LSM.Returns the number of bytes sent, or an error code.
- intkernel_sendmsg(structsocket*sock,structmsghdr*msg,structkvec*vec,size_tnum,size_tsize)¶
send a message throughsock (kernel-space)
Parameters
structsocket*socksocket
structmsghdr*msgmessage header
structkvec*veckernel vec
size_tnumvec array length
size_tsizetotal message data size
Description
Builds the message data withvec and sends it throughsock.Returns the number of bytes sent, or an error code.
Parameters
structsocket*socksocket
structmsghdr*msgmessage to receive
intflagsmessage flags
Description
Receivesmsg fromsock, passing through LSM. Returns the total numberof bytes received, or an error.
- intkernel_recvmsg(structsocket*sock,structmsghdr*msg,structkvec*vec,size_tnum,size_tsize,intflags)¶
Receive a message from a socket (kernel space)
Parameters
structsocket*sockThe socket to receive the message from
structmsghdr*msgReceived message
structkvec*vecInput s/g array for message data
size_tnumSize of input s/g array
size_tsizeNumber of bytes to read
intflagsMessage flags (MSG_DONTWAIT, etc...)
Description
On return the msg structure contains the scatter/gather array passed in thevec argument. The array is modified so that it consists of the unfilledportion of the original array.
The returned value is the total number of bytes received, or an error.
Parameters
intfamilyprotocol family (AF_INET, ...)
inttypecommunication type (SOCK_STREAM, ...)
intprotocolprotocol (0, ...)
structsocket**resnew socket
Description
Creates a new socket and assigns it tores, passing through LSM.The new socket initialization is not complete, see
kernel_accept().Returns 0 or an error. On failureres is set toNULL.This function internally uses GFP_KERNEL.
- int__sock_create(structnet*net,intfamily,inttype,intprotocol,structsocket**res,intkern)¶
creates a socket
Parameters
structnet*netnet namespace
intfamilyprotocol family (AF_INET, ...)
inttypecommunication type (SOCK_STREAM, ...)
intprotocolprotocol (0, ...)
structsocket**resnew socket
intkernboolean for kernel space sockets
Description
Creates a new socket and assigns it tores, passing through LSM.Returns 0 or an error. On failureres is set to
NULL.kern mustbe set to true if the socket resides in kernel space.This function internally uses GFP_KERNEL.
Parameters
intfamilyprotocol family (AF_INET, ...)
inttypecommunication type (SOCK_STREAM, ...)
intprotocolprotocol (0, ...)
structsocket**resnew socket
Description
A wrapper around
__sock_create().Returns 0 or an error. This function internally uses GFP_KERNEL.
- intsock_create_kern(structnet*net,intfamily,inttype,intprotocol,structsocket**res)¶
creates a socket (kernel space)
Parameters
structnet*netnet namespace
intfamilyprotocol family (AF_INET, ...)
inttypecommunication type (SOCK_STREAM, ...)
intprotocolprotocol (0, ...)
structsocket**resnew socket
Description
A wrapper around
__sock_create().Returns 0 or an error. This function internally uses GFP_KERNEL.
- intsock_register(conststructnet_proto_family*ops)¶
add a socket protocol handler
Parameters
conststructnet_proto_family*opsdescription of protocol
Description
This function is called by a protocol handler that wants toadvertise its address family, and have it linked into thesocket interface. The value ops->family corresponds to thesocket system call protocol family.
- voidsock_unregister(intfamily)¶
remove a protocol handler
Parameters
intfamilyprotocol family to remove
Description
This function is called by a protocol handler that wants toremove its address family, and have it unlinked from thenew socket creation.
If protocol handler is a module, then it can use module referencecounts to protect against new references. If protocol handler is nota module then it needs to provide its own protection inthe ops->create routine.
- intkernel_bind(structsocket*sock,structsockaddr_unsized*addr,intaddrlen)¶
bind an address to a socket (kernel space)
Parameters
structsocket*socksocket
structsockaddr_unsized*addraddress
intaddrlenlength of address
Description
Returns 0 or an error.
Parameters
structsocket*socksocket
intbacklogpending connections queue size
Description
Returns 0 or an error.
- intkernel_accept(structsocket*sock,structsocket**newsock,intflags)¶
accept a connection (kernel space)
Parameters
structsocket*socklistening socket
structsocket**newsocknew connected socket
intflagsflags
Description
flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.If it fails,newsock is guaranteed to be
NULL.Returns 0 or an error.
- intkernel_connect(structsocket*sock,structsockaddr_unsized*addr,intaddrlen,intflags)¶
connect a socket (kernel space)
Parameters
structsocket*socksocket
structsockaddr_unsized*addraddress
intaddrlenaddress length
intflagsflags (O_NONBLOCK, ...)
Description
For datagram sockets,addr is the address to which datagrams are sentby default, and the only address from which datagrams are received.For stream sockets, attempts to connect toaddr.Returns 0 or an error code.
- intkernel_getsockname(structsocket*sock,structsockaddr*addr)¶
get the address which the socket is bound (kernel space)
Parameters
structsocket*socksocket
structsockaddr*addraddress holder
Description
Fills theaddr pointer with the address which the socket is bound.Returns the length of the address in bytes or an error code.
- intkernel_getpeername(structsocket*sock,structsockaddr*addr)¶
get the address which the socket is connected (kernel space)
Parameters
structsocket*socksocket
structsockaddr*addraddress holder
Description
Fills theaddr pointer with the address which the socket is connected.Returns the length of the address in bytes or an error code.
- intkernel_sock_shutdown(structsocket*sock,enumsock_shutdown_cmdhow)¶
shut down part of a full-duplex connection (kernel space)
Parameters
structsocket*socksocket
enumsock_shutdown_cmdhowconnection part
Description
Returns 0 or an error.
Parameters
structsock*sksocket
Description
This routine returns the IP overhead imposed by a socket i.e.the length of the underlying IP header, depending on whetherthis is an IPv4 or IPv6 socket and the length from IP options turnedon at the socket. Assumes that the caller has a lock on the socket.
- voiddrop_reasons_register_subsys(enumskb_drop_reason_subsyssubsys,conststructdrop_reason_list*list)¶
register another drop reason subsystem
Parameters
enumskb_drop_reason_subsyssubsysthe subsystem to register, must not be the core
conststructdrop_reason_list*listthe list of drop reasons within the subsystem, must point toa statically initialized list
- voiddrop_reasons_unregister_subsys(enumskb_drop_reason_subsyssubsys)¶
unregister a drop reason subsystem
Parameters
enumskb_drop_reason_subsyssubsysthe subsystem to remove, must not be the core
Note
This willsynchronize_rcu() to ensure no users when it returns.
- u32napi_skb_cache_get_bulk(void**skbs,u32n)¶
obtain a number of zeroed skb heads from the cache
Parameters
void**skbspointer to an at leastn-sized array to fill with skb pointers
u32nnumber of entries to provide
Description
Tries to obtainnsk_buff entries from the NAPI percpu cache and writesthe pointers into the provided arrayskbs. If there are less entriesavailable, tries to replenish the cache and bulk-allocates the diff fromthe MM layer if needed.The heads are being zeroed with eithermemset() or__GFP_ZERO, so they areready for {,__}build_skb_around() and don’t have any data buffers attached.Must be calledonly from the BH context.
Return
number of successfully allocated skbs (n if no actual allocationneeded orkmem_cache_alloc_bulk() didn’t fail).
- structsk_buff*build_skb_around(structsk_buff*skb,void*data,unsignedintfrag_size)¶
build a network buffer around provided skb
Parameters
structsk_buff*skbsk_buff provide by caller, must be memset cleared
void*datadata buffer provided by caller
unsignedintfrag_sizesize of data
Parameters
void*datadata buffer provided by caller
unsignedintfrag_sizesize of data
Description
Version of__napi_build_skb() that takes care of skb->head_fragand skb->pfmemalloc when the data is a page or page fragment.
Returns a newsk_buff on success,NULL on allocation failure.
Parameters
unsignedintsizesize to allocate
gfp_tgfp_maskallocation mask
intflagsIf SKB_ALLOC_FCLONE is set, allocate from fclone cacheinstead of head cache and allocate a cloned (child) skb.If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used forallocations in case the data is required for writeback
intnodenuma node to allocate memory on
Description
Allocate a new
sk_buff. The returned buffer has no headroom and atail room of at least size bytes. The object has a reference countof one. The return is the buffer. On a failure the return isNULL.Buffers may only be allocated from interrupts using agfp_mask of
GFP_ATOMIC.
- structsk_buff*__netdev_alloc_skb(structnet_device*dev,unsignedintlen,gfp_tgfp_mask)¶
allocate an skbuff for rx on a specific device
Parameters
structnet_device*devnetwork device to receive on
unsignedintlenlength to allocate
gfp_tgfp_maskget_free_pages mask, passed to alloc_skb
Description
Allocate a new
sk_buffand assign it a usage count of one. Thebuffer has NET_SKB_PAD headroom built in. Users should allocatethe headroom they think they need without accounting for thebuilt in space. The built in space is used for optimisations.
NULLis returned if there is no free memory.
- structsk_buff*napi_alloc_skb(structnapi_struct*napi,unsignedintlen)¶
allocate skbuff for rx in a specific NAPI instance
Parameters
structnapi_struct*napinapi instance this buffer was allocated for
unsignedintlenlength to allocate
Description
Allocate a new sk_buff for use in NAPI receive. This buffer willattempt to allocate the head from a special reserved region usedonly for NAPI Rx allocation. By doing this we can save severalCPU cycles by avoiding having to disable and re-enable IRQs.
NULLis returned if there is no free memory.
Parameters
structsk_buff*skbbuffer
Description
Free an sk_buff. Release anything attached to the buffer.Clean the state. This is an internal helper function. Users shouldalways call kfree_skb
- void__fix_addresssk_skb_reason_drop(structsock*sk,structsk_buff*skb,enumskb_drop_reasonreason)¶
free an sk_buff with special reason
Parameters
structsock*skthe socket to receiveskb, or NULL if not applicable
structsk_buff*skbbuffer to free
enumskb_drop_reasonreasonreason why this skb is dropped
Description
Drop a reference to the buffer and free it if the usage count has hitzero. Meanwhile, pass the receiving socket and drop reason to‘kfree_skb’ tracepoint.
Parameters
structsk_buff*skbbuffer that triggered an error
Description
Report xmit error if a device callback is tracking this skb.skb must be freed afterwards.
Parameters
structsk_buff*skbbuffer to free
Description
Drop a ref to the buffer and free it if the usage count has hit zeroFunctions identically to kfree_skb, but kfree_skb assumes that the frameis being dropped after a failure and notes that
- structsk_buff*alloc_skb_for_msg(structsk_buff*first)¶
allocate sk_buff to wrap frag list forming a msg
Parameters
structsk_buff*firstfirst sk_buff of the msg
Parameters
structsk_buff*dstthe skb to receive the contents
structsk_buff*srcthe skb to supply the contents
Description
This is identical to skb_clone except that the target skb issupplied by the user.
The target skb is returned upon exit.
Parameters
structsk_buff*skbthe skb to modify
gfp_tgfp_maskallocation priority
Description
This must be called on skb with SKBFL_ZEROCOPY_ENABLE.It will copy all frags into kernel and drop the referenceto userspace pages.
If this function is called from an interrupt
gfp_mask()must beGFP_ATOMIC.Returns 0 on success or a negative error code on failureto allocate kernel memory to copy to.
Parameters
structsk_buff*skbbuffer to clone
gfp_tgfp_maskallocation priority
Description
Duplicate an
sk_buff. The new one is not owned by a socket. Bothcopies share the same packet data but not structure. The newbuffer has a reference count of 1. If the allocation fails thefunction returnsNULLotherwise the new buffer is returned.If this function is called from an interrupt
gfp_mask()must beGFP_ATOMIC.
Parameters
conststructsk_buff*skbbuffer to copy
gfp_tgfp_maskallocation priority
Description
Make a copy of both an
sk_buffand its data. This is used when thecaller wishes to modify the data and needs a private copy of thedata to alter. ReturnsNULLon failure or the pointer to the bufferon success. The returned buffer has a reference count of 1.As by-product this function converts non-linear
sk_buffto linearone, so thatsk_buffbecomes completely private and caller is allowedto modify all the data of returned buffer. This means that thisfunction is not recommended for use in circumstances when onlyheader is going to be modified. Usepskb_copy()instead.
- structsk_buff*__pskb_copy_fclone(structsk_buff*skb,intheadroom,gfp_tgfp_mask,boolfclone)¶
create copy of an sk_buff with private head.
Parameters
structsk_buff*skbbuffer to copy
intheadroomheadroom of new skb
gfp_tgfp_maskallocation priority
boolfcloneif true allocate the copy of the skb from the fclonecache instead of the head cache; it is recommended to set thisto true for the cases where the copy will likely be cloned
Description
Make a copy of both an
sk_buffand part of its data, locatedin header. Fragmented data remain shared. This is used whenthe caller wishes to modify only header ofsk_buffand needsprivate copy of the header to alter. ReturnsNULLon failureor the pointer to the buffer on success.The returned buffer has a reference count of 1.
Parameters
structsk_buff*skbbuffer to reallocate
intnheadroom to add at head
intntailroom to add at tail
gfp_tgfp_maskallocation priority
Description
Expands (or creates identical copy, ifnhead andntail are zero)header ofskb.
sk_buffitself is not changed.sk_buffMUST havereference count of 1. Returns zero in the case of success or error,if expansion failed. In the last case,sk_buffis not changed.All the pointers pointing into skb header may change and must bereloaded after call to this function.
Note
- If you skb_push() the start of the buffer after reallocating the
header, call
skb_postpush_data_move()first to move the metadata out ofthe way before writing tosk_buff->data.
Parameters
structsk_buff*skbbuffer to reallocate
unsignedintheadroomneeded headroom
Description
Unlike skb_realloc_headroom, this one does not allocate a new skbif possible; copies skb->sk to new skb as neededand frees original skb in case of failures.
It expect increased headroom and generates warning otherwise.
- structsk_buff*skb_copy_expand(conststructsk_buff*skb,intnewheadroom,intnewtailroom,gfp_tgfp_mask)¶
copy and expand sk_buff
Parameters
conststructsk_buff*skbbuffer to copy
intnewheadroomnew free bytes at head
intnewtailroomnew free bytes at tail
gfp_tgfp_maskallocation priority
Description
Make a copy of both an
sk_buffand its data and while doing soallocate additional space.This is used when the caller wishes to modify the data and needs aprivate copy of the data to alter as well as more space for new fields.Returns
NULLon failure or the pointer to the bufferon success. The returned buffer has a reference count of 1.You must pass
GFP_ATOMICas the allocation priority if this functionis called from an interrupt.
Parameters
structsk_buff*skbbuffer to pad
intpadspace to pad
boolfree_on_errorfree buffer on error
Description
Ensure that a buffer is followed by a padding area that is zerofilled. Used by network drivers which may DMA or transfer databeyond the buffer end onto the wire.
May return error in out of memory cases. The skb is freed on erroriffree_on_error is true.
- void*pskb_put(structsk_buff*skb,structsk_buff*tail,intlen)¶
add data to the tail of a potentially fragmented buffer
Parameters
structsk_buff*skbstart of the buffer to use
structsk_buff*tailtail fragment of the buffer to use
intlenamount of data to add
Description
This function extends the used data area of the potentiallyfragmented buffer.tail must be the last fragment ofskb -- orskb itself. If this would exceed the total buffer size the kernelwill panic. A pointer to the first byte of the extra data isreturned.
Parameters
structsk_buff*skbbuffer to use
unsignedintlenamount of data to add
Description
This function extends the used data area of the buffer. If this wouldexceed the total buffer size the kernel will panic. A pointer to thefirst byte of the extra data is returned.
Parameters
structsk_buff*skbbuffer to use
unsignedintlenamount of data to add
Description
This function extends the used data area of the buffer at the bufferstart. If this would exceed the total buffer headroom the kernel willpanic. A pointer to the first byte of the extra data is returned.
Parameters
structsk_buff*skbbuffer to use
unsignedintlenamount of data to remove
Description
This function removes data from the start of a buffer, returningthe memory to the headroom. A pointer to the next data in the bufferis returned. Once the data has been pulled future pushes will overwritethe old data.
- void*skb_pull_data(structsk_buff*skb,size_tlen)¶
remove data from the start of a buffer returning its original position.
Parameters
structsk_buff*skbbuffer to use
size_tlenamount of data to remove
Description
This function removes data from the start of a buffer, returningthe memory to the headroom. A pointer to the original data in the bufferis returned after checking if there is enough data to pull. Once thedata has been pulled future pushes will overwrite the old data.
Parameters
structsk_buff*skbbuffer to alter
unsignedintlennew length
Description
Cut the length of a buffer down by removing data from the tail. Ifthe buffer is already under the length specified it is not modified.The skb must be linear.
Parameters
structsk_buff*skbbuffer to reallocate
intdeltanumber of bytes to advance tail
Description
The function makes a sense only on a fragmented
sk_buff,it expands header moving its tail forward and copying necessarydata from fragmented part.
sk_buffMUST have reference count of 1.Returns
NULL(andsk_buffdoes not change) if pull failedor value of new tail of skb in the case of success.All the pointers pointing into skb header may change and must bereloaded after call to this function.
- intskb_copy_bits(conststructsk_buff*skb,intoffset,void*to,intlen)¶
copy bits from skb to kernel buffer
Parameters
conststructsk_buff*skbsource skb
intoffsetoffset in source
void*todestination buffer
intlennumber of bytes to copy
Description
Copy the specified number of bytes from the source skb to thedestination buffer.
- CAUTION ! :
If its prototype is ever changed,check arch/{*}/net/{*}.S files,since it is called from BPF assembly code.
- intskb_store_bits(structsk_buff*skb,intoffset,constvoid*from,intlen)¶
store bits from kernel buffer to skb
Parameters
structsk_buff*skbdestination buffer
intoffsetoffset in destination
constvoid*fromsource buffer
intlennumber of bytes to copy
Description
Copy the specified number of bytes from the source buffer to thedestination skb. This function handles all the messy bits oftraversing fragment lists and such.
Parameters
structsk_buff*todestination buffer
structsk_buff*fromsource buffer
intlennumber of bytes to copy from source buffer
inthlensize of linear headroom in destination buffer
Description
Copies up tolen bytes fromfrom toto by creating referencesto the frags in the source buffer.
Thehlen as calculated by
skb_zerocopy_headlen()specifies theheadroom in theto buffer.Return value:0: everything is OK-ENOMEM: couldn’t orphan frags offrom due to lack of memory-EFAULT:
skb_copy_bits()found some problem with skb geometry
Parameters
structsk_buff_head*listlist to dequeue from
Description
Remove the head of the list. The list lock is taken so the functionmay be used safely with other locking list functions. The head item isreturned or
NULLif the list is empty.
Parameters
structsk_buff_head*listlist to dequeue from
Description
Remove the tail of the list. The list lock is taken so the functionmay be used safely with other locking list functions. The tail item isreturned or
NULLif the list is empty.
- voidskb_queue_purge_reason(structsk_buff_head*list,enumskb_drop_reasonreason)¶
empty a list
Parameters
structsk_buff_head*listlist to empty
enumskb_drop_reasonreasondrop reason
Description
Delete all buffers on an
sk_bufflist. Each buffer is removed fromthe list and one reference dropped. This function takes the listlock and is atomic with respect to other list locking functions.
Parameters
structsk_buff_head*listlist to use
structsk_buff*newskbuffer to queue
Description
Queue a buffer at the start of the list. This function takes thelist lock and can be used safely with other locking
sk_bufffunctionssafely.A buffer cannot be placed on two lists at the same time.
Parameters
structsk_buff_head*listlist to use
structsk_buff*newskbuffer to queue
Description
Queue a buffer at the tail of the list. This function takes thelist lock and can be used safely with other locking
sk_bufffunctionssafely.A buffer cannot be placed on two lists at the same time.
Parameters
structsk_buff*skbbuffer to remove
structsk_buff_head*listlist to use
Description
Remove a packet from a list. The list locks are taken and thisfunction is atomic with respect to other list locked calls
You must know what list the SKB is on.
Parameters
structsk_buff*oldbuffer to insert after
structsk_buff*newskbuffer to insert
structsk_buff_head*listlist to use
Description
Place a packet after a given packet in a list. The list locks are takenand this function is atomic with respect to other list locked calls.A buffer cannot be placed on two lists at the same time.
- voidskb_split(structsk_buff*skb,structsk_buff*skb1,constu32len)¶
Split fragmented skb to two parts at length len.
Parameters
structsk_buff*skbthe buffer to split
structsk_buff*skb1the buffer to receive the second part
constu32lennew length for skb
- voidskb_prepare_seq_read(structsk_buff*skb,unsignedintfrom,unsignedintto,structskb_seq_state*st)¶
Prepare a sequential read of skb data
Parameters
structsk_buff*skbthe buffer to read
unsignedintfromlower offset of data to be read
unsignedinttoupper offset of data to be read
structskb_seq_state*ststate variable
Description
Initializes the specified state variable. Must be called beforeinvokingskb_seq_read() for the first time.
- unsignedintskb_seq_read(unsignedintconsumed,constu8**data,structskb_seq_state*st)¶
Sequentially read skb data
Parameters
unsignedintconsumednumber of bytes consumed by the caller so far
constu8**datadestination pointer for data to be returned
structskb_seq_state*ststate variable
Description
Reads a block of skb data atconsumed relative to thelower offset specified toskb_prepare_seq_read(). Assignsthe head of the data block todata and returns the lengthof the block or 0 if the end of the skb data or the upperoffset has been reached.
The caller is not required to consume all of the datareturned, i.e.consumed is typically set to the numberof bytes already consumed and the next call toskb_seq_read() will return the remaining part of the block.
- Note 1: The size of each block of data returned can be arbitrary,
this limitation is the cost for zerocopy sequentialreads of potentially non linear data.
- Note 2: Fragment lists within fragments are not implemented
at the moment, state->root_skb could be replaced witha stack for this purpose.
- voidskb_abort_seq_read(structskb_seq_state*st)¶
Abort a sequential read of skb data
Parameters
structskb_seq_state*ststate variable
Description
Must be called ifskb_seq_read() was not called until itreturned 0.
- intskb_copy_seq_read(structskb_seq_state*st,intoffset,void*to,intlen)¶
copy from a skb_seq_state to a buffer
Parameters
structskb_seq_state*stsource skb_seq_state
intoffsetoffset in source
void*todestination buffer
intlennumber of bytes to copy
Description
Copylen bytes fromoffset bytes into the sourcest to the destinationbufferto.offset should increase (or be unchanged) with each subsequentcall to this function. If offset needs to decrease from the previous usestshould be reset first.
Return
0 on success or -EINVAL if the copy ended early
- unsignedintskb_find_text(structsk_buff*skb,unsignedintfrom,unsignedintto,structts_config*config)¶
Find a text pattern in skb data
Parameters
structsk_buff*skbthe buffer to look in
unsignedintfromsearch offset
unsignedinttosearch limit
structts_config*configtextsearch configuration
Description
Finds a pattern in the skb data according to the specifiedtextsearch configuration. Usetextsearch_next() to retrievesubsequent occurrences of the pattern. Returns the offsetto the first occurrence or UINT_MAX if no match was found.
Parameters
structsk_buff*skbbuffer to update
unsignedintlenlength of data pulled
Description
This function performs an skb_pull on the packet and updatesthe CHECKSUM_COMPLETE checksum. It should be used onreceive path processing instead of skb_pull unless you knowthat the checksum difference is zero (e.g., a valid IP header)or you are setting ip_summed to CHECKSUM_NONE.
- structsk_buff*skb_segment(structsk_buff*head_skb,netdev_features_tfeatures)¶
Perform protocol segmentation on skb.
Parameters
structsk_buff*head_skbbuffer to segment
netdev_features_tfeaturesfeatures for the output path (see dev->features)
Description
This function performs segmentation on the given skb. It returnsa pointer to the first in a list of new skbs for the segments.In case of error it returns ERR_PTR(err).
- intskb_to_sgvec(structsk_buff*skb,structscatterlist*sg,intoffset,intlen)¶
Fill a scatter-gather list from a socket buffer
Parameters
structsk_buff*skbSocket buffer containing the buffers to be mapped
structscatterlist*sgThe scatter-gather list to map into
intoffsetThe offset into the buffer’s contents to start mapping
intlenLength of buffer space to be mapped
Description
Fill the specified scatter-gather list with mappings/pointers into aregion of the buffer space attached to a socket buffer. Returns eitherthe number of scatterlist items used, or -EMSGSIZE if the contentscould not fit.
- intskb_cow_data(structsk_buff*skb,inttailbits,structsk_buff**trailer)¶
Check that a socket buffer’s data buffers are writable
Parameters
structsk_buff*skbThe socket buffer to check.
inttailbitsAmount of trailing space to be added
structsk_buff**trailerReturned pointer to the skb where thetailbits space begins
Description
Make sure that the data buffers attached to a socket buffer arewritable. If they are not, private copies are made of the data buffersand the socket buffer is set to use these instead.
Iftailbits is given, make sure that there is space to writetailbitsbytes of data beyond current end of socket buffer.trailer will beset to point to the skb in which this space begins.
The number of scatterlist elements required to completely map theCOW’d and extended socket buffer will be returned.
Parameters
structsk_buff*skbthe skb to clone
Description
This function creates a clone of a buffer that holds a reference onsk_refcnt. Buffers created via this function are meant to bereturned using sock_queue_err_skb, or free via kfree_skb.
When passing buffers allocated with this function to sock_queue_err_skbit is necessary to wrap the call with sock_hold/sock_put in order toprevent the socket from being released prior to being enqueued onthe sk_error_queue.
- boolskb_partial_csum_set(structsk_buff*skb,u16start,u16off)¶
set up and verify partial csum values for packet
Parameters
structsk_buff*skbthe skb to set
u16startthe number of bytes after skb->data to start checksumming.
u16offthe offset from start to place the checksum.
Description
For untrusted partially-checksummed packets, we need to make sure the valuesfor skb->csum_start and skb->csum_offset are valid so we don’t oops.
This function checks and sets those values and skb->ip_summed: if thisreturns false you should drop the packet.
Parameters
structsk_buff*skbthe skb to set up
boolrecalculateif true the pseudo-header checksum will be recalculated
- structsk_buff*skb_checksum_trimmed(structsk_buff*skb,unsignedinttransport_len,__sum16(*skb_chkf)(structsk_buff*skb))¶
validate checksum of an skb
Parameters
structsk_buff*skbthe skb to check
unsignedinttransport_lenthe data length beyond the network header
__sum16(*skb_chkf)(structsk_buff*skb)checksum function to use
Description
Applies the given checksum function skb_chkf to the provided skb.Returns a checked and maybe trimmed skb. Returns NULL on error.
If the skb has data beyond the given transport length, then atrimmed & cloned skb is checked and returned.
Caller needs to set the skb transport header and free any returned skb if itdiffers from the provided skb.
- boolskb_try_coalesce(structsk_buff*to,structsk_buff*from,bool*fragstolen,int*delta_truesize)¶
try to merge skb to prior one
Parameters
structsk_buff*toprior buffer
structsk_buff*frombuffer to add
bool*fragstolenpointer to boolean
int*delta_truesizehow much more was allocated than was requested
Parameters
structsk_buff*skbbuffer to clean
boolxnetpacket is crossing netns
Description
skb_scrub_packet can be used after encapsulating or decapsulating a packetinto/from a tunnel. Some information have to be cleared during theseoperations.skb_scrub_packet can also be used to clean a skb before injecting it inanother namespace (xnet == true). We have to clear all information in theskb that could impact namespace isolation.
Parameters
structsk_buff*skbSocket buffer to modify
Description
Drop the Ethernet header ofskb.
Expects that skb->data points to the mac header and that no VLAN tags arepresent.
Returns 0 on success, -errno otherwise.
- intskb_eth_push(structsk_buff*skb,constunsignedchar*dst,constunsignedchar*src)¶
Add a new Ethernet header at the head of a packet
Parameters
structsk_buff*skbSocket buffer to modify
constunsignedchar*dstDestination MAC address of the new header
constunsignedchar*srcSource MAC address of the new header
Description
Prependskb with a new Ethernet header.
Expects that skb->data points to the mac header, which must be empty.
Returns 0 on success, -errno otherwise.
- intskb_mpls_push(structsk_buff*skb,__be32mpls_lse,__be16mpls_proto,intmac_len,boolethernet)¶
push a new MPLS header after mac_len bytes from start of the packet
Parameters
structsk_buff*skbbuffer
__be32mpls_lseMPLS label stack entry to push
__be16mpls_protoethertype of the new MPLS header (expects 0x8847 or 0x8848)
intmac_lenlength of the MAC header
boolethernetflag to indicate if the resulting packet after skb_mpls_push isethernet
Description
Expects skb->data at mac header.
Returns 0 on success, -errno otherwise.
- intskb_mpls_pop(structsk_buff*skb,__be16next_proto,intmac_len,boolethernet)¶
pop the outermost MPLS header
Parameters
structsk_buff*skbbuffer
__be16next_protoethertype of header after popped MPLS header
intmac_lenlength of the MAC header
boolethernetflag to indicate if the packet is ethernet
Description
Expects skb->data at mac header.
Returns 0 on success, -errno otherwise.
- intskb_mpls_update_lse(structsk_buff*skb,__be32mpls_lse)¶
modify outermost MPLS header and update csum
Parameters
structsk_buff*skbbuffer
__be32mpls_lsenew MPLS label stack entry to update to
Description
Expects skb->data at mac header.
Returns 0 on success, -errno otherwise.
Parameters
structsk_buff*skbbuffer
Description
Expects skb->data at mac header.
Returns 0 on success, -errno otherwise.
- structsk_buff*alloc_skb_with_frags(unsignedlongheader_len,unsignedlongdata_len,intorder,int*errcode,gfp_tgfp_mask)¶
allocate skb with page frags
Parameters
unsignedlongheader_lensize of linear part
unsignedlongdata_lenneeded length in frags
intordermax page order desired.
int*errcodepointer to error code if any
gfp_tgfp_maskallocation mask
Description
This can be used to allocate a paged skb, given a maximal order for frags.
Parameters
structsk_buff*skbbuffer
Description
Can be used to save memory before skb is added to a busy queue.If packet has bytes in frags and enough tail room in skb->head,pull all of them, so that we can free the frags right now and adjusttruesize.
Notes
We do not reallocate skb->head thus can not fail.Caller must re-evaluate skb->truesize if needed.
- void*__skb_ext_set(structsk_buff*skb,enumskb_ext_idid,structskb_ext*ext)¶
attach the specified extension storage to this skb
Parameters
structsk_buff*skbbuffer
enumskb_ext_ididextension id
structskb_ext*extextension storage previously allocated via
__skb_ext_alloc()
Description
Existing extensions, if any, are cleared.
Returns the pointer to the extension.
- void*skb_ext_add(structsk_buff*skb,enumskb_ext_idid)¶
allocate space for given extension, COW if needed
Parameters
structsk_buff*skbbuffer
enumskb_ext_ididextension to allocate space for
Description
Allocates enough space for the given extension.If the extension is already present, a pointer to that extensionis returned.
If the skb was cloned, COW applies and the returned memory can bemodified without changing the extension space of clones buffers.
Returns pointer to the extension or NULL on allocation failure.
- ssize_tskb_splice_from_iter(structsk_buff*skb,structiov_iter*iter,ssize_tmaxsize)¶
Splice (or copy) pages to skbuff
Parameters
structsk_buff*skbThe buffer to add pages to
structiov_iter*iterIterator representing the pages to be added
ssize_tmaxsizeMaximum amount of pages to be added
Description
This is a common helper function for supporting MSG_SPLICE_PAGES. Itextracts pages from an iterator and adds them to the socket buffer ifpossible, copying them to fragments if not possible (such as if they’re slabpages).
Returns the amount of data spliced/copied or -EMSGSIZE if there’sinsufficient space in the buffer to transfer anything.
- boolsk_ns_capable(conststructsock*sk,structuser_namespace*user_ns,intcap)¶
General socket capability test
Parameters
conststructsock*skSocket to use a capability on or through
structuser_namespace*user_nsThe user namespace of the capability to use
intcapThe capability to use
Description
Test to see if the opener of the socket had when the socket wascreated and the current process has the capabilitycap in the usernamespaceuser_ns.
Parameters
conststructsock*skSocket to use a capability on or through
intcapThe global capability to use
Description
Test to see if the opener of the socket had when the socket wascreated and the current process has the capabilitycap in all usernamespaces.
Parameters
conststructsock*skSocket to use a capability on or through
intcapThe capability to use
Description
Test to see if the opener of the socket had when the socket was createdand the current process has the capabilitycap over the network namespacethe socket is a member of.
Parameters
structsock*sksocket to set it on
Description
SetSOCK_MEMALLOC on a socket for access to emergency reserves.It’s the responsibility of the admin to adjust min_free_kbytesto meet the requirements
- structsock*sk_alloc(structnet*net,intfamily,gfp_tpriority,structproto*prot,intkern)¶
All socket objects are allocated here
Parameters
structnet*netthe applicable net namespace
intfamilyprotocol family
gfp_tpriorityfor allocation (
GFP_KERNEL,GFP_ATOMIC, etc)structproto*protstructprotoassociated with this new sock instanceintkernis this to be a kernel socket?
Parameters
conststructsock*skthe socket to clone
constgfp_tpriorityfor allocation (
GFP_KERNEL,GFP_ATOMIC, etc)boollockif true, lock the cloned sk
Description
Iflock is true, the clone is locked bybh_lock_sock(), andcaller must unlock socket even in error path bybh_unlock_sock().
- boolskb_page_frag_refill(unsignedintsz,structpage_frag*pfrag,gfp_tgfp)¶
check that a page_frag contains enough room
Parameters
unsignedintszminimum size of the fragment we want to get
structpage_frag*pfragpointer to page_frag
gfp_tgfppriority for memory allocation
Note
While this allocator tries to use high order pages, there isno guarantee that allocations succeed. Therefore,sz MUST beless or equal than PAGE_SIZE.
- intsk_wait_data(structsock*sk,long*timeo,conststructsk_buff*skb)¶
wait for data to arrive at sk_receive_queue
Parameters
structsock*sksock to wait on
long*timeofor how long
conststructsk_buff*skblast skb seen on sk_receive_queue
Description
Now socket state including sk->sk_err is changed only under lock,hence we may omit checks after joining wait queue.We check receive queue beforeschedule() only as optimization;it is very likely thatrelease_sock() added new data.
Parameters
structsock*sksocket
intsizememory size to allocate
intkindallocation type
Description
If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it meansrmem allocation. This function assumes that protocols which havememory_pressure use sk_wmem_queued as write buffer accounting.
Parameters
structsock*sksocket
intamountnumber of bytes (rounded down to a PAGE_SIZE multiple)
- structsk_buff*__skb_try_recv_datagram(structsock*sk,structsk_buff_head*queue,unsignedintflags,int*off,int*err,structsk_buff**last)¶
Receive a datagram skbuff
Parameters
structsock*sksocket
structsk_buff_head*queuesocket queue from which to receive
unsignedintflagsMSG_ flags
int*offan offset in bytes to peek skb from. Returns an offsetwithin an skb where data actually starts
int*errerror code returned
structsk_buff**lastset to last peeked message to inform the wait functionwhat to look for when peeking
Description
Get a datagram skbuff, understands the peeking, nonblocking wakeupsand possible races. This replaces identical code in packet, raw andudp, as well as the IPX AX.25 and Appletalk. It also finally fixesthe long standing peek and read race for datagram sockets. If youalter this routine remember it must be re-entrant.
This function will lock the socket if a skb is returned, sothe caller needs to unlock the socket in that case (usually bycalling skb_free_datagram). Returns NULL witherr set to-EAGAIN if no data was available or to some other value if anerror was detected.
It does not lock socket since today. This function is
free of race conditions. This measure should/can improve
significantly datagram socket latencies at high loads,
when data copying to user space takes lots of time.
(BTW I’ve just killed the last
cli()in IP/IPv6/core/netlink/packet
Great win.)
--ANK (980729)
The order of the tests when we find no data waiting are specifiedquite explicitly by POSIX 1003.1g, don’t change them without havingthe standard around please.
- intskb_kill_datagram(structsock*sk,structsk_buff*skb,unsignedintflags)¶
Free a datagram skbuff forcibly
Parameters
structsock*sksocket
structsk_buff*skbdatagram skbuff
unsignedintflagsMSG_ flags
Description
This function frees a datagram skbuff that was received byskb_recv_datagram. The flags argument must match the oneused for skb_recv_datagram.
If the MSG_PEEK flag is set, and the packet is still on thereceive queue of the socket, it will be taken off the queuebefore it is freed.
This function currently only disables BH when acquiring thesk_receive_queue lock. Therefore it must not be used in acontext where that lock is acquired in an IRQ context.
It returns 0 if the packet was removed by us.
- intskb_copy_and_crc32c_datagram_iter(conststructsk_buff*skb,intoffset,structiov_iter*to,intlen,u32*crcp)¶
Copy datagram to an iovec iterator and update a CRC32C value.
Parameters
conststructsk_buff*skbbuffer to copy
intoffsetoffset in the buffer to start copying from
structiov_iter*toiovec iterator to copy to
intlenamount of data to copy from buffer to iovec
u32*crcppointer to CRC32C value to update
Return
0 on success, -EFAULT if there was a fault during copy.
- intskb_copy_datagram_iter(conststructsk_buff*skb,intoffset,structiov_iter*to,intlen)¶
Copy a datagram to an iovec iterator.
Parameters
conststructsk_buff*skbbuffer to copy
intoffsetoffset in the buffer to start copying from
structiov_iter*toiovec iterator to copy to
intlenamount of data to copy from buffer to iovec
- intskb_copy_datagram_from_iter(structsk_buff*skb,intoffset,structiov_iter*from,intlen)¶
Copy a datagram from an iov_iter.
Parameters
structsk_buff*skbbuffer to copy
intoffsetoffset in the buffer to start copying to
structiov_iter*fromthe copy source
intlenamount of data to copy to buffer from iovec
Description
Returns 0 or -EFAULT.
- intzerocopy_sg_from_iter(structsk_buff*skb,structiov_iter*from)¶
Build a zerocopy datagram from an iov_iter
Parameters
structsk_buff*skbbuffer to copy
structiov_iter*fromthe source to copy from
Description
The function will first copy up to headlen, and then pin the userspacepages and build frags through them.
Returns 0, -EFAULT or -EMSGSIZE.
- intskb_copy_and_csum_datagram_msg(structsk_buff*skb,inthlen,structmsghdr*msg)¶
Copy and checksum skb to user iovec.
Parameters
structsk_buff*skbskbuff
inthlenhardware length
structmsghdr*msgdestination
Description
Caller _must_ check that skb will fit to this iovec.
Return
0 - success.-EINVAL - checksum failure.-EFAULT - fault during copy.
- __poll_tdatagram_poll_queue(structfile*file,structsocket*sock,poll_table*wait,structsk_buff_head*rcv_queue)¶
same as datagram_poll, but on a specific receive queue
Parameters
structfile*filefile struct
structsocket*socksocket
poll_table*waitpoll table
structsk_buff_head*rcv_queuereceive queue to poll
Description
Performs polling on the given receive queue, handling shutdown, error,and connection state. This is useful for protocols that deliveruserspace-bound packets through a custom queue instead ofsk->sk_receive_queue.
Return
poll bitmask indicating the socket’s current state
Parameters
structfile*filefile struct
structsocket*socksocket
poll_table*waitpoll table
Description
Datagram poll: Again totally generic. This also handlessequenced packet sockets providing the socket receive queueis only ever holding data ready to receive.
Note
- when youdon’t use this routine for this protocol,
and you use a different write policy from
sock_writeable()then please supply your own write_space callback.
Return
poll bitmask indicating the socket’s current state
- intsk_stream_wait_connect(structsock*sk,long*timeo_p)¶
Wait for a socket to get into the connected state
Parameters
structsock*sksock to wait on
long*timeo_pfor how long to wait
Description
Must be called with the socket locked.
Parameters
structsock*sksocket to wait for memory
long*timeo_pfor how long
Socket Filter¶
- intsk_filter_trim_cap(structsock*sk,structsk_buff*skb,unsignedintcap,enumskb_drop_reason*reason)¶
run a packet through a socket filter
Parameters
structsock*sksock associated with
sk_buffstructsk_buff*skbbuffer to filter
unsignedintcaplimit on how short the eBPF program may trim the packet
enumskb_drop_reason*reasonrecord drop reason on errors (negative return value)
Description
Run the eBPF program and then cut skb->data to correct size returned bythe program. If pkt_len is 0 we toss packet. If skb->len is smallerthan pkt_len we keep whole skb->data. This is the socket levelwrapper to bpf_prog_run. It returns 0 if the packet shouldbe accepted or -EPERM if the packet should be tossed.
- intbpf_prog_create(structbpf_prog**pfp,structsock_fprog_kern*fprog)¶
create an unattached filter
Parameters
structbpf_prog**pfpthe unattached filter that is created
structsock_fprog_kern*fprogthe filter program
Description
Create a filter independent of any socket. We first run somesanity checks on it to make sure it does not explode on us later.If an error occurs or there is insufficient memory for the filtera negative errno code is returned. On success the return is zero.
- intbpf_prog_create_from_user(structbpf_prog**pfp,structsock_fprog*fprog,bpf_aux_classic_check_ttrans,boolsave_orig)¶
create an unattached filter from user buffer
Parameters
structbpf_prog**pfpthe unattached filter that is created
structsock_fprog*fprogthe filter program
bpf_aux_classic_check_ttranspost-classic verifier transformation handler
boolsave_origsave classic BPF program
Description
This function effectively does the same asbpf_prog_create(), onlythat it builds up its insns buffer from user space provided buffer.It also allows for passing a bpf_aux_classic_check_t handler.
Parameters
structsock_fprog*fprogthe filter program
structsock*skthe socket to use
Description
Attach the user’s filter code. We first run some sanity checks onit to make sure it does not explode on us later. If an erroroccurs or there is insufficient memory for the filter a negativeerrno code is returned. On success the return is zero.
Generic Network Statistics¶
- structgnet_stats_basic¶
byte/packet throughput statistics
Definition:
struct gnet_stats_basic { __u64 bytes; __u32 packets;};Members
bytesnumber of seen bytes
packetsnumber of seen packets
- structgnet_stats_rate_est¶
rate estimator
Definition:
struct gnet_stats_rate_est { __u32 bps; __u32 pps;};Members
bpscurrent byte rate
ppscurrent packet rate
- structgnet_stats_rate_est64¶
rate estimator
Definition:
struct gnet_stats_rate_est64 { __u64 bps; __u64 pps;};Members
bpscurrent byte rate
ppscurrent packet rate
- structgnet_stats_queue¶
queuing statistics
Definition:
struct gnet_stats_queue { __u32 qlen; __u32 backlog; __u32 drops; __u32 requeues; __u32 overlimits;};Members
qlenqueue length
backlogbacklog size of queue
dropsnumber of dropped packets
requeuesnumber of requeues
overlimitsnumber of enqueues over the limit
- structgnet_estimator¶
rate estimator configuration
Definition:
struct gnet_estimator { signed char interval; unsigned char ewma_log;};Members
intervalsampling period
ewma_logthe log of measurement window weight
- intgnet_stats_start_copy_compat(structsk_buff*skb,inttype,inttc_stats_type,intxstats_type,spinlock_t*lock,structgnet_dump*d,intpadattr)¶
start dumping procedure in compatibility mode
Parameters
structsk_buff*skbsocket buffer to put statistics TLVs into
inttypeTLV type for top level statistic TLV
inttc_stats_typeTLV type for backward compatibility
structtc_statsTLVintxstats_typeTLV type for backward compatibility xstats TLV
spinlock_t*lockstatistics lock
structgnet_dump*ddumping handle
intpadattrpadding attribute
Description
Initializes the dumping handle, grabs the statistic lock and appendsan empty TLV header to the socket buffer for use a container for allother statistic TLVS.
The dumping handle is marked to be in backward compatibility mode tellingallgnet_stats_copy_XXX() functions to fill a local copy ofstructtc_stats.
Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
- intgnet_stats_start_copy(structsk_buff*skb,inttype,spinlock_t*lock,structgnet_dump*d,intpadattr)¶
start dumping procedure in compatibility mode
Parameters
structsk_buff*skbsocket buffer to put statistics TLVs into
inttypeTLV type for top level statistic TLV
spinlock_t*lockstatistics lock
structgnet_dump*ddumping handle
intpadattrpadding attribute
Description
Initializes the dumping handle, grabs the statistic lock and appendsan empty TLV header to the socket buffer for use a container for allother statistic TLVS.
Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
- intgnet_stats_copy_basic(structgnet_dump*d,structgnet_stats_basic_sync__percpu*cpu,structgnet_stats_basic_sync*b,boolrunning)¶
copy basic statistics into statistic TLV
Parameters
structgnet_dump*ddumping handle
structgnet_stats_basic_sync__percpu*cpucopy statistic per cpu
structgnet_stats_basic_sync*bbasic statistics
boolrunningtrue ifb represents a running qdisc, thusb’sinternal values might change during basic reads.Only used ifcpu is NULL
Context
task; must not be run from IRQ or BH contexts
Description
Appends the basic statistics to the top level TLV created bygnet_stats_start_copy().
Returns 0 on success or -1 with the statistic lock releasedif the room in the socket buffer was not sufficient.
- intgnet_stats_copy_basic_hw(structgnet_dump*d,structgnet_stats_basic_sync__percpu*cpu,structgnet_stats_basic_sync*b,boolrunning)¶
copy basic hw statistics into statistic TLV
Parameters
structgnet_dump*ddumping handle
structgnet_stats_basic_sync__percpu*cpucopy statistic per cpu
structgnet_stats_basic_sync*bbasic statistics
boolrunningtrue ifb represents a running qdisc, thusb’sinternal values might change during basic reads.Only used ifcpu is NULL
Context
task; must not be run from IRQ or BH contexts
Description
Appends the basic statistics to the top level TLV created bygnet_stats_start_copy().
Returns 0 on success or -1 with the statistic lock releasedif the room in the socket buffer was not sufficient.
- intgnet_stats_copy_rate_est(structgnet_dump*d,structnet_rate_estimator__rcu**rate_est)¶
copy rate estimator statistics into statistics TLV
Parameters
structgnet_dump*ddumping handle
structnet_rate_estimator__rcu**rate_estrate estimator
Description
Appends the rate estimator statistics to the top level TLV created bygnet_stats_start_copy().
Returns 0 on success or -1 with the statistic lock releasedif the room in the socket buffer was not sufficient.
- intgnet_stats_copy_queue(structgnet_dump*d,structgnet_stats_queue__percpu*cpu_q,structgnet_stats_queue*q,__u32qlen)¶
copy queue statistics into statistics TLV
Parameters
structgnet_dump*ddumping handle
structgnet_stats_queue__percpu*cpu_qper cpu queue statistics
structgnet_stats_queue*qqueue statistics
__u32qlenqueue length statistics
Description
Appends the queue statistics to the top level TLV created bygnet_stats_start_copy(). Using per cpu queue statistics ifthey are available.
Returns 0 on success or -1 with the statistic lock releasedif the room in the socket buffer was not sufficient.
- intgnet_stats_copy_app(structgnet_dump*d,void*st,intlen)¶
copy application specific statistics into statistics TLV
Parameters
structgnet_dump*ddumping handle
void*stapplication specific statistics data
intlenlength of data
Description
Appends the application specific statistics to the top level TLV created bygnet_stats_start_copy() and remembers the data for XSTATS if the dumpinghandle is in backward compatibility mode.
Returns 0 on success or -1 with the statistic lock releasedif the room in the socket buffer was not sufficient.
- intgnet_stats_finish_copy(structgnet_dump*d)¶
finish dumping procedure
Parameters
structgnet_dump*ddumping handle
Description
Corrects the length of the top level TLV to include all TLVs addedbygnet_stats_copy_XXX() calls. Adds the backward compatibility TLVsifgnet_stats_start_copy_compat() was used and releases the statisticslock.
Returns 0 on success or -1 with the statistic lock releasedif the room in the socket buffer was not sufficient.
- intgen_new_estimator(structgnet_stats_basic_sync*bstats,structgnet_stats_basic_sync__percpu*cpu_bstats,structnet_rate_estimator__rcu**rate_est,spinlock_t*lock,boolrunning,structnlattr*opt)¶
create a new rate estimator
Parameters
structgnet_stats_basic_sync*bstatsbasic statistics
structgnet_stats_basic_sync__percpu*cpu_bstatsbstats per cpu
structnet_rate_estimator__rcu**rate_estrate estimator statistics
spinlock_t*locklock for statistics and control path
boolrunningtrue ifbstats represents a running qdisc, thusbstats’internal values might change during basic reads. Only usedifbstats_cpu is NULL
structnlattr*optrate estimator configuration TLV
Description
Creates a new rate estimator withbstats as source andrate_estas destination. A new timer with the interval specified in theconfiguration TLV is created. Upon each interval, the latest statisticswill be read frombstats and the estimated rate will be stored inrate_est with the statistics lock grabbed during this period.
Returns 0 on success or a negative error code.
- voidgen_kill_estimator(structnet_rate_estimator__rcu**rate_est)¶
remove a rate estimator
Parameters
structnet_rate_estimator__rcu**rate_estrate estimator
Description
Removes the rate estimator.
- intgen_replace_estimator(structgnet_stats_basic_sync*bstats,structgnet_stats_basic_sync__percpu*cpu_bstats,structnet_rate_estimator__rcu**rate_est,spinlock_t*lock,boolrunning,structnlattr*opt)¶
replace rate estimator configuration
Parameters
structgnet_stats_basic_sync*bstatsbasic statistics
structgnet_stats_basic_sync__percpu*cpu_bstatsbstats per cpu
structnet_rate_estimator__rcu**rate_estrate estimator statistics
spinlock_t*locklock for statistics and control path
boolrunningtrue ifbstats represents a running qdisc, thusbstats’internal values might change during basic reads. Only usedifcpu_bstats is NULL
structnlattr*optrate estimator configuration TLV
Description
Replaces the configuration of a rate estimator by callinggen_kill_estimator() andgen_new_estimator().
Returns 0 on success or a negative error code.
- boolgen_estimator_active(structnet_rate_estimator__rcu**rate_est)¶
test if estimator is currently in use
Parameters
structnet_rate_estimator__rcu**rate_estrate estimator
Description
Returns true if estimator is active, and false if not.
SUN RPC subsystem¶
- __be32*xdr_encode_opaque_fixed(__be32*p,constvoid*ptr,unsignedintnbytes)¶
Encode fixed length opaque data
Parameters
__be32*ppointer to current position in XDR buffer.
constvoid*ptrpointer to data to encode (or NULL)
unsignedintnbytessize of data.
Description
Copy the array of data of length nbytes at ptr to the XDR bufferat position p, then align to the next 32-bit boundary by paddingwith zero bytes (see RFC1832).
Note
if ptr is NULL, only the padding is performed.
Returns the updated current XDR buffer position
- __be32*xdr_encode_opaque(__be32*p,constvoid*ptr,unsignedintnbytes)¶
Encode variable length opaque data
Parameters
__be32*ppointer to current position in XDR buffer.
constvoid*ptrpointer to data to encode (or NULL)
unsignedintnbytessize of data.
Description
Returns the updated current XDR buffer position
- voidxdr_terminate_string(conststructxdr_buf*buf,constu32len)¶
‘0’-terminate a string residing in an xdr_buf
Parameters
conststructxdr_buf*bufXDR buffer where string resides
constu32lenlength of string, in bytes
- unsignedintxdr_buf_to_bvec(structbio_vec*bvec,unsignedintbvec_size,conststructxdr_buf*xdr)¶
Copy components of an xdr_buf into a bio_vec array
Parameters
structbio_vec*bvecbio_vec array to populate
unsignedintbvec_sizeelement count ofbio_vec
conststructxdr_buf*xdrxdr_buf to be copied
Description
Returns the number of entries consumed inbvec.
- voidxdr_inline_pages(structxdr_buf*xdr,unsignedintoffset,structpage**pages,unsignedintbase,unsignedintlen)¶
Prepare receive buffer for a large reply
Parameters
structxdr_buf*xdrxdr_buf into which reply will be placed
unsignedintoffsetexpected offset where data payload will start, in bytes
structpage**pagesvector of
structpagepointersunsignedintbaseoffset in first page where receive should start, in bytes
unsignedintlenexpected size of the upper layer data payload, in bytes
- void_copy_from_pages(char*p,structpage**pages,size_tpgbase,size_tlen)¶
Parameters
char*ppointer to destination
structpage**pagesarray of pages
size_tpgbaseoffset of source data
size_tlenlength
Description
Copies data into an arbitrary memory location from an array of pagesThe copy is assumed to be non-overlapping.
- unsignedintxdr_stream_pos(conststructxdr_stream*xdr)¶
Return the current offset from the start of the xdr_stream
Parameters
conststructxdr_stream*xdrpointer to
structxdr_stream
- unsignedintxdr_page_pos(conststructxdr_stream*xdr)¶
Return the current offset from the start of the xdr pages
Parameters
conststructxdr_stream*xdrpointer to
structxdr_stream
- voidxdr_init_encode(structxdr_stream*xdr,structxdr_buf*buf,__be32*p,structrpc_rqst*rqst)¶
Initialize a
structxdr_streamfor sending data.
Parameters
structxdr_stream*xdrpointer to xdr_stream struct
structxdr_buf*bufpointer to XDR buffer in which to encode data
__be32*pcurrent pointer inside XDR buffer
structrpc_rqst*rqstpointer to controlling rpc_rqst, for debugging
Note
- at the moment the RPC client only passes the length of our
scratch buffer in the xdr_buf’s header kvec. Previously thismeant we needed to call
xdr_adjust_iovec()after encoding thedata. With the new scheme, the xdr_stream manages the detailsof the buffer length, and takes care of adjusting the kveclength for us.
- voidxdr_init_encode_pages(structxdr_stream*xdr,structxdr_buf*buf)¶
Initialize an xdr_stream for encoding into pages
Parameters
structxdr_stream*xdrpointer to xdr_stream struct
structxdr_buf*bufpointer to XDR buffer into which to encode data
- void__xdr_commit_encode(structxdr_stream*xdr)¶
Ensure all data is written to buffer
Parameters
structxdr_stream*xdrpointer to xdr_stream
Description
We handle encoding across page boundaries by giving the caller atemporary location to write to, then later copying the data intoplace; xdr_commit_encode does that copying.
Normally the caller doesn’t need to call this directly, as thefollowing xdr_reserve_space will do it. But an explicit call may berequired at the end of encoding, or any other time when the xdr_bufdata might be read.
- __be32*xdr_reserve_space(structxdr_stream*xdr,size_tnbytes)¶
Reserve buffer space for sending
Parameters
structxdr_stream*xdrpointer to xdr_stream
size_tnbytesnumber of bytes to reserve
Description
Checks that we have enough buffer space to encode ‘nbytes’ morebytes of data. If so, update the total xdr_buf length, andadjust the length of the current kvec.
The returned pointer is valid only until the next call toxdr_reserve_space() orxdr_commit_encode() onxdr. The currentimplementation of this API guarantees that space reserved for afour-byte data item remains valid untilxdr is destroyed, butthat might not always be true in the future.
- intxdr_reserve_space_vec(structxdr_stream*xdr,size_tnbytes)¶
Reserves a large amount of buffer space for sending
Parameters
structxdr_stream*xdrpointer to xdr_stream
size_tnbytesnumber of bytes to reserve
Description
The size argument passed toxdr_reserve_space() is determined basedon the number of bytes remaining in the current page to avoidinvalidating iov_base pointers whenxdr_commit_encode() is called.
- Return values:
0: success-EMSGSIZE: not enough space is available inxdr
- voidxdr_truncate_encode(structxdr_stream*xdr,size_tlen)¶
truncate an encode buffer
Parameters
structxdr_stream*xdrpointer to xdr_stream
size_tlennew length of buffer
Description
Truncates the xdr stream, so that xdr->buf->len == len,and xdr->p points at offset len from the start of the buffer, andhead, tail, and page lengths are adjusted to correspond.
If this means moving xdr->p to a different buffer, we assume thatthe end pointer should be set to the end of the current page,except in the case of the head buffer when we assume the headbuffer’s current length represents the end of the available buffer.
This isnot safe to use on a buffer that already has inlined pagecache pages (as in a zero-copy server read reply), except for thesimple case of truncating from one position in the tail to another.
- voidxdr_truncate_decode(structxdr_stream*xdr,size_tlen)¶
Truncate a decoding stream
Parameters
structxdr_stream*xdrpointer to
structxdr_streamsize_tlenNumber of bytes to remove
- intxdr_restrict_buflen(structxdr_stream*xdr,intnewbuflen)¶
decrease available buffer space
Parameters
structxdr_stream*xdrpointer to xdr_stream
intnewbuflennew maximum number of bytes available
Description
Adjust our idea of how much space is available in the buffer.If we’ve already used too much space in the buffer, returns -1.If the available space is already smaller than newbuflen, returns 0and does nothing. Otherwise, adjusts xdr->buf->buflen to newbuflenand ensures xdr->end is set at most offset newbuflen from the startof the buffer.
- voidxdr_write_pages(structxdr_stream*xdr,structpage**pages,unsignedintbase,unsignedintlen)¶
Insert a list of pages into an XDR buffer for sending
Parameters
structxdr_stream*xdrpointer to xdr_stream
structpage**pagesarray of pages to insert
unsignedintbasestarting offset of first data byte inpages
unsignedintlennumber of data bytes inpages to insert
Description
After thepages are added, the tail iovec is instantiated pointing toend of the head buffer, and the stream is set up to encode subsequentitems into the tail.
- voidxdr_init_decode(structxdr_stream*xdr,structxdr_buf*buf,__be32*p,structrpc_rqst*rqst)¶
Initialize an xdr_stream for decoding data.
Parameters
structxdr_stream*xdrpointer to xdr_stream struct
structxdr_buf*bufpointer to XDR buffer from which to decode data
__be32*pcurrent pointer inside XDR buffer
structrpc_rqst*rqstpointer to controlling rpc_rqst, for debugging
- voidxdr_init_decode_pages(structxdr_stream*xdr,structxdr_buf*buf,structpage**pages,unsignedintlen)¶
Initialize an xdr_stream for decoding into pages
Parameters
structxdr_stream*xdrpointer to xdr_stream struct
structxdr_buf*bufpointer to XDR buffer from which to decode data
structpage**pageslist of pages to decode into
unsignedintlenlength in bytes of buffer in pages
- voidxdr_finish_decode(structxdr_stream*xdr)¶
Clean up the xdr_stream after decoding data.
Parameters
structxdr_stream*xdrpointer to xdr_stream struct
- __be32*xdr_inline_decode(structxdr_stream*xdr,size_tnbytes)¶
Retrieve XDR data to decode
Parameters
structxdr_stream*xdrpointer to xdr_stream struct
size_tnbytesnumber of bytes of data to decode
Description
Check if the input buffer is long enough to enable us to decode‘nbytes’ more bytes of data starting at the current position.If so return the current pointer, then update the currentpointer position.
- unsignedintxdr_read_pages(structxdr_stream*xdr,unsignedintlen)¶
align page-based XDR data to current pointer position
Parameters
structxdr_stream*xdrpointer to xdr_stream struct
unsignedintlennumber of bytes of page data
Description
Moves data beyond the current pointer position from the XDR head[] bufferinto the page list. Any data that lies beyond current position +lenbytes is moved into the XDR tail[]. The xdr_stream current position isthen advanced past that data to align to the next XDR object in the tail.
Returns the number of XDR encoded bytes now contained in the pages
- voidxdr_set_pagelen(structxdr_stream*xdr,unsignedintlen)¶
Sets the length of the XDR pages
Parameters
structxdr_stream*xdrpointer to xdr_stream struct
unsignedintlennew length of the XDR page data
Description
Either grows or shrinks the length of the xdr pages by setting pagelen tolen bytes. When shrinking, any extra data is moved into buf->tail, whereaswhen growing any data beyond the current pointer is moved into the tail.
Returns True if the operation was successful, and False otherwise.
- voidxdr_enter_page(structxdr_stream*xdr,unsignedintlen)¶
decode data from the XDR page
Parameters
structxdr_stream*xdrpointer to xdr_stream struct
unsignedintlennumber of bytes of page data
Description
Moves data beyond the current pointer position from the XDR head[] bufferinto the page list. Any data that lies beyond current position + “len”bytes is moved into the XDR tail[]. The current pointer is thenrepositioned at the beginning of the first XDR page.
- intxdr_buf_subsegment(conststructxdr_buf*buf,structxdr_buf*subbuf,unsignedintbase,unsignedintlen)¶
set subbuf to a portion of buf
Parameters
conststructxdr_buf*bufan xdr buffer
structxdr_buf*subbufthe result buffer
unsignedintbasebeginning of range in bytes
unsignedintlenlength of range in bytes
Description
setssubbuf to an xdr buffer representing the portion ofbuf oflengthlen starting at offsetbase.
buf andsubbuf may be pointers to the samestructxdr_buf.
Returns -1 if base or length are out of bounds.
- boolxdr_stream_subsegment(structxdr_stream*xdr,structxdr_buf*subbuf,unsignedintnbytes)¶
setsubbuf to a portion ofxdr
Parameters
structxdr_stream*xdran xdr_stream set up for decoding
structxdr_buf*subbufthe result buffer
unsignedintnbyteslength ofxdr to extract, in bytes
Description
Sets upsubbuf to represent a portion ofxdr. The portionstarts at the current offset inxdr, and extends for a lengthofnbytes. If this is successful,xdr is advanced to the nextXDR data item following that portion.
- Return values:
true:subbuf has been initialized, andxdr has been advanced.false: a bounds error has occurred
- unsignedintxdr_stream_move_subsegment(structxdr_stream*xdr,unsignedintoffset,unsignedinttarget,unsignedintlength)¶
Move part of a stream to another position
Parameters
structxdr_stream*xdrthe source xdr_stream
unsignedintoffsetthe source offset of the segment
unsignedinttargetthe target offset of the segment
unsignedintlengththe number of bytes to move
Description
Moveslength bytes fromoffset totarget in the xdr_stream, overwritinganything in its space. Returns the number of bytes in the segment.
- unsignedintxdr_stream_zero(structxdr_stream*xdr,unsignedintoffset,unsignedintlength)¶
zero out a portion of an xdr_stream
Parameters
structxdr_stream*xdran xdr_stream to zero out
unsignedintoffsetthe starting point in the stream
unsignedintlengththe number of bytes to zero
- voidxdr_buf_trim(structxdr_buf*buf,unsignedintlen)¶
lop at most “len” bytes off the end of “buf”
Parameters
structxdr_buf*bufbuf to be trimmed
unsignedintlennumber of bytes to reduce “buf” by
Description
Trim an xdr_buf by the given number of bytes by fixing up the lengths. Notethat it’s possible that we’ll trim less than that amount if the xdr_buf istoo small, or if (for instance) it’s all in the head and the parser hasalready read too far into it.
- ssize_txdr_stream_decode_string_dup(structxdr_stream*xdr,char**str,size_tmaxlen,gfp_tgfp_flags)¶
Decode and duplicate variable length string
Parameters
structxdr_stream*xdrpointer to xdr_stream
char**strlocation to store pointer to string
size_tmaxlenmaximum acceptable string length
gfp_tgfp_flagsGFP mask to use
Description
- Return values:
On success, returns length of NUL-terminated string stored in*ptr
-EBADMSGon XDR buffer overflow-EMSGSIZEif the size of the string would exceedmaxlen-ENOMEMon memory allocation failure
- ssize_txdr_stream_decode_opaque_auth(structxdr_stream*xdr,u32*flavor,void**body,unsignedint*body_len)¶
Decode
structopaque_auth(RFC5531 S8.2)
Parameters
structxdr_stream*xdrpointer to xdr_stream
u32*flavorlocation to store decoded flavor
void**bodylocation to store decode body
unsignedint*body_lenlocation to store length of decoded body
Description
- Return values:
On success, returns the number of buffer bytes consumed
-EBADMSGon XDR buffer overflow-EMSGSIZEif the decoded size of the body field exceeds 400 octets
- ssize_txdr_stream_encode_opaque_auth(structxdr_stream*xdr,u32flavor,void*body,unsignedintbody_len)¶
Encode
structopaque_auth(RFC5531 S8.2)
Parameters
structxdr_stream*xdrpointer to xdr_stream
u32flavorverifier flavor to encode
void*bodycontent of body to encode
unsignedintbody_lenlength of body to encode
Description
- Return values:
On success, returns length in bytes of XDR buffer consumed
-EBADMSGon XDR buffer overflow-EMSGSIZEif the size ofbody exceeds 400 octets
- intsvc_reg_xprt_class(structsvc_xprt_class*xcl)¶
Register a server-side RPC transport class
Parameters
structsvc_xprt_class*xclNew transport class to be registered
Description
Returns zero on success; otherwise a negative errno is returned.
- voidsvc_unreg_xprt_class(structsvc_xprt_class*xcl)¶
Unregister a server-side RPC transport class
Parameters
structsvc_xprt_class*xclTransport class to be unregistered
- voidsvc_xprt_deferred_close(structsvc_xprt*xprt)¶
Close a transport
Parameters
structsvc_xprt*xprttransport instance
Description
Used in contexts that need to defer the work of shutting downthe transport to an nfsd thread.
- voidsvc_xprt_received(structsvc_xprt*xprt)¶
start next receiver thread
Parameters
structsvc_xprt*xprtcontrolling transport
Description
The caller must hold the XPT_BUSY bit and mustnot thereafter touch transport data.
Note
XPT_DATA only gets cleared when a read-attempt finds no (orinsufficient) data.
- intsvc_xprt_create_from_sa(structsvc_serv*serv,constchar*xprt_name,structnet*net,structsockaddr*sap,intflags,conststructcred*cred)¶
Add a new listener toserv from socket address
Parameters
structsvc_serv*servtarget RPC service
constchar*xprt_nametransport class name
structnet*netnetwork namespace
structsockaddr*sapsocket address pointer
intflagsSVC_SOCK flags
conststructcred*credcredential to bind to this transport
Description
Return local xprt port on success or-EPROTONOSUPPORT on failure
- intsvc_xprt_create(structsvc_serv*serv,constchar*xprt_name,structnet*net,constintfamily,constunsignedshortport,intflags,conststructcred*cred)¶
Add a new listener toserv
Parameters
structsvc_serv*servtarget RPC service
constchar*xprt_nametransport class name
structnet*netnetwork namespace
constintfamilynetwork address family
constunsignedshortportlistener port
intflagsSVC_SOCK flags
conststructcred*credcredential to bind to this transport
Description
Return local xprt port on success or-EPROTONOSUPPORT on failure
- char*svc_print_addr(structsvc_rqst*rqstp,char*buf,size_tlen)¶
Format rq_addr field for printing
Parameters
structsvc_rqst*rqstpsvc_rqst
structcontainingaddress to printchar*buftarget buffer for formatted address
size_tlenlength of target buffer
- voidsvc_xprt_enqueue(structsvc_xprt*xprt)¶
Queue a transport on an idle nfsd thread
Parameters
structsvc_xprt*xprttransport with data pending
- voidsvc_reserve(structsvc_rqst*rqstp,intspace)¶
change the space reserved for the reply to a request.
Parameters
structsvc_rqst*rqstpThe request in question
intspacenew max space to reserve
Description
Each request reserves some space on the output queue of the transportto make sure the reply fits. This function reduces that reservedspace to be the amount of space used already, plusspace.
- voidsvc_wake_up(structsvc_serv*serv)¶
Wake up a service thread for non-transport work
Parameters
structsvc_serv*servRPC service
Description
Some svc_serv’s will have occasional work to do, even when a xprt is notwaiting to be serviced. This function is there to “kick” a task in one ofthose services so that it can wake up and do that work. Note that we onlybother with pool 0 as we don’t need to wake up more than one thread forthis purpose.
- voidsvc_recv(structsvc_rqst*rqstp)¶
Receive and process the next request on any transport
Parameters
structsvc_rqst*rqstpan idle RPC service thread
Description
This code is carefully organised not to touch any cachelines inthe shared svc_serv structure, only cachelines in the localsvc_pool.
- voidsvc_xprt_close(structsvc_xprt*xprt)¶
Close a client connection
Parameters
structsvc_xprt*xprttransport to disconnect
- voidsvc_xprt_destroy_all(structsvc_serv*serv,structnet*net,boolunregister)¶
Destroy transports associated withserv
Parameters
structsvc_serv*servRPC service to be shut down
structnet*nettarget network namespace
boolunregistertrue if it is OK to unregister the destroyed xprts
Description
Server threads may still be running (especially in the case where theservice is still running in other network namespaces).
So we shut down sockets the same way we would on a running server, bysetting XPT_CLOSE, enqueuing, and letting a thread pick it up to dothe close. In the case there are no such other threads,threads running,svc_clean_up_xprts() does a simple version of aserver’s main event loop, and in the case where there are otherthreads, we may need to wait a little while and then check again tosee if they’re done.
- structsvc_xprt*svc_find_listener(structsvc_serv*serv,constchar*xcl_name,structnet*net,conststructsockaddr*sa)¶
find an RPC transport instance
Parameters
structsvc_serv*servpointer to svc_serv to search
constchar*xcl_nameC string containing transport’s class name
structnet*netowner net pointer
conststructsockaddr*sasockaddr containing address
Description
Return the transport instance pointer for the endpoint acceptingconnections/peer traffic from the specified transport class,and matching sockaddr.
- structsvc_xprt*svc_find_xprt(structsvc_serv*serv,constchar*xcl_name,structnet*net,constsa_family_taf,constunsignedshortport)¶
find an RPC transport instance
Parameters
structsvc_serv*servpointer to svc_serv to search
constchar*xcl_nameC string containing transport’s class name
structnet*netowner net pointer
constsa_family_tafAddress family of transport’s local address
constunsignedshortporttransport’s IP port number
Description
Return the transport instance pointer for the endpoint acceptingconnections/peer traffic from the specified transport class,address family and port.
Specifying 0 for the address family or port is effectively awild-card, and will result in matching the first transport in theservice’s list that has a matching class name.
- intsvc_xprt_names(structsvc_serv*serv,char*buf,constintbuflen)¶
format a buffer with a list of transport names
Parameters
structsvc_serv*servpointer to an RPC service
char*bufpointer to a buffer to be filled in
constintbuflenlength of buffer to be filled in
Description
Fills inbuf with a string containing a list of transport names,each name terminated with ‘n’.
Returns positive length of the filled-in string on success; otherwisea negative errno value is returned if an error occurs.
- intxprt_register_transport(structxprt_class*transport)¶
register a transport implementation
Parameters
structxprt_class*transporttransport to register
Description
If a transport implementation is loaded as a kernel module, it cancall this interface to make itself known to the RPC client.
Return
0: transport successfully registered-EEXIST: transport already registered-EINVAL: transport module being unloaded
- intxprt_unregister_transport(structxprt_class*transport)¶
unregister a transport implementation
Parameters
structxprt_class*transporttransport to unregister
Return
0: transport successfully unregistered-ENOENT: transport never registered
- intxprt_find_transport_ident(constchar*netid)¶
convert a netid into a transport identifier
Parameters
constchar*netidtransport to load
Return
> 0: transport identifier-ENOENT: transport module not available
- intxprt_reserve_xprt(structrpc_xprt*xprt,structrpc_task*task)¶
serialize write access to transports
Parameters
structrpc_xprt*xprtpointer to the target transport
structrpc_task*tasktask that is requesting access to the transport
Description
This prevents mixing the payload of separate requests, and preventstransport connects from colliding with writes. No congestion controlis provided.
- voidxprt_release_xprt(structrpc_xprt*xprt,structrpc_task*task)¶
allow other requests to use a transport
Parameters
structrpc_xprt*xprttransport with other tasks potentially waiting
structrpc_task*tasktask that is releasing access to the transport
Description
Note that “task” can be NULL. No congestion control is provided.
- voidxprt_release_xprt_cong(structrpc_xprt*xprt,structrpc_task*task)¶
allow other requests to use a transport
Parameters
structrpc_xprt*xprttransport with other tasks potentially waiting
structrpc_task*tasktask that is releasing access to the transport
Description
Note that “task” can be NULL. Another task is awoken to use thetransport if the transport’s congestion window allows it.
- boolxprt_request_get_cong(structrpc_xprt*xprt,structrpc_rqst*req)¶
Request congestion control credits
Parameters
structrpc_xprt*xprtpointer to transport
structrpc_rqst*reqpointer to RPC request
Description
Useful for transports that require congestion control.
- voidxprt_release_rqst_cong(structrpc_task*task)¶
housekeeping when request is complete
Parameters
structrpc_task*taskRPC request that recently completed
Description
Useful for transports that require congestion control.
- voidxprt_adjust_cwnd(structrpc_xprt*xprt,structrpc_task*task,intresult)¶
adjust transport congestion window
Parameters
structrpc_xprt*xprtpointer to xprt
structrpc_task*taskrecently completed RPC request used to adjust window
intresultresult code of completed RPC request
Description
The transport code maintains an estimate on the maximum number of out-standing RPC requests, using a smoothed version of the congestionavoidance implemented in 44BSD. This is basically the Van Jacobsoncongestion algorithm: If a retransmit occurs, the congestion window ishalved; otherwise, it is incremented by 1/cwnd when
a reply is received and
a full number of requests are outstanding and
the congestion window hasn’t been updated recently.
- voidxprt_wake_pending_tasks(structrpc_xprt*xprt,intstatus)¶
wake all tasks on a transport’s pending queue
Parameters
structrpc_xprt*xprttransport with waiting tasks
intstatusresult code to plant in each task before waking it
- voidxprt_wait_for_buffer_space(structrpc_xprt*xprt)¶
wait for transport output buffer to clear
Parameters
structrpc_xprt*xprttransport
Description
Note that we only set the timer for the case ofRPC_IS_SOFT(), sincewe don’t in general want to force a socket disconnection due toan incomplete RPC call transmission.
- boolxprt_write_space(structrpc_xprt*xprt)¶
wake the task waiting for transport output buffer space
Parameters
structrpc_xprt*xprttransport with waiting tasks
Description
Can be called in a soft IRQ context, so xprt_write_space never sleeps.
- voidxprt_disconnect_done(structrpc_xprt*xprt)¶
mark a transport as disconnected
Parameters
structrpc_xprt*xprttransport to flag for disconnect
- voidxprt_force_disconnect(structrpc_xprt*xprt)¶
force a transport to disconnect
Parameters
structrpc_xprt*xprttransport to disconnect
- unsignedlongxprt_reconnect_delay(conststructrpc_xprt*xprt)¶
compute the wait before scheduling a connect
Parameters
conststructrpc_xprt*xprttransport instance
- voidxprt_reconnect_backoff(structrpc_xprt*xprt,unsignedlonginit_to)¶
compute the new re-establish timeout
Parameters
structrpc_xprt*xprttransport instance
unsignedlonginit_toinitial reestablish timeout
- structrpc_rqst*xprt_lookup_rqst(structrpc_xprt*xprt,__be32xid)¶
find an RPC request corresponding to an XID
Parameters
structrpc_xprt*xprttransport on which the original request was transmitted
__be32xidRPC XID of incoming reply
Description
Caller holds xprt->queue_lock.
- voidxprt_pin_rqst(structrpc_rqst*req)¶
Pin a request on the transport receive list
Parameters
structrpc_rqst*reqRequest to pin
Description
Caller must ensure this is atomic with the call toxprt_lookup_rqst()so should be holding xprt->queue_lock.
- voidxprt_unpin_rqst(structrpc_rqst*req)¶
Unpin a request on the transport receive list
Parameters
structrpc_rqst*reqRequest to pin
Description
Caller should be holding xprt->queue_lock.
- voidxprt_update_rtt(structrpc_task*task)¶
Update RPC RTT statistics
Parameters
structrpc_task*taskRPC request that recently completed
Description
Caller holds xprt->queue_lock.
- voidxprt_complete_rqst(structrpc_task*task,intcopied)¶
called when reply processing is complete
Parameters
structrpc_task*taskRPC request that recently completed
intcopiedactual number of bytes received from the transport
Description
Caller holds xprt->queue_lock.
- voidxprt_wait_for_reply_request_def(structrpc_task*task)¶
wait for reply
Parameters
structrpc_task*taskpointer to rpc_task
Description
Set a request’s retransmit timeout based on the transport’sdefault timeout parameters. Used by transports that don’t adjustthe retransmit timeout based on round-trip time estimation,and put the task to sleep on the pending queue.
- voidxprt_wait_for_reply_request_rtt(structrpc_task*task)¶
wait for reply using RTT estimator
Parameters
structrpc_task*taskpointer to rpc_task
Description
Set a request’s retransmit timeout using the RTT estimator,and put the task to sleep on the pending queue.
- structrpc_xprt*xprt_get(structrpc_xprt*xprt)¶
return a reference to an RPC transport.
Parameters
structrpc_xprt*xprtpointer to the transport
- voidxprt_put(structrpc_xprt*xprt)¶
release a reference to an RPC transport.
Parameters
structrpc_xprt*xprtpointer to the transport
- voidrpc_wake_up(structrpc_wait_queue*queue)¶
wake up all rpc_tasks
Parameters
structrpc_wait_queue*queuerpc_wait_queue on which the tasks are sleeping
Description
Grabs queue->lock
- voidrpc_wake_up_status(structrpc_wait_queue*queue,intstatus)¶
wake up all rpc_tasks and set their status value.
Parameters
structrpc_wait_queue*queuerpc_wait_queue on which the tasks are sleeping
intstatusstatus value to set
Description
Grabs queue->lock
- structrpc_iostats*rpc_alloc_iostats(structrpc_clnt*clnt)¶
allocate an rpc_iostats structure
Parameters
structrpc_clnt*clntRPC program, version, and xprt
- voidrpc_free_iostats(structrpc_iostats*stats)¶
release an rpc_iostats structure
Parameters
structrpc_iostats*statsdoomed rpc_iostats structure
- voidrpc_count_iostats_metrics(conststructrpc_task*task,structrpc_iostats*op_metrics)¶
tally up per-task stats
Parameters
conststructrpc_task*taskcompleted rpc_task
structrpc_iostats*op_metricsstat structure for OP that will accumulate stats fromtask
- voidrpc_count_iostats(conststructrpc_task*task,structrpc_iostats*stats)¶
tally up per-task stats
Parameters
conststructrpc_task*taskcompleted rpc_task
structrpc_iostats*statsarray of stat structures
Description
Uses the statidx fromtask
- intrpc_queue_upcall(structrpc_pipe*pipe,structrpc_pipe_msg*msg)¶
queue an upcall message to userspace
Parameters
structrpc_pipe*pipeupcall pipe on which to queue given message
structrpc_pipe_msg*msgmessage to queue
Description
Call with aninode created byrpc_mkpipe() to queue an upcall.A userspace process may then later read the upcall by performing aread on an open file for this inode. It is up to the caller toinitialize the fields ofmsg (other thanmsg->list) appropriately.
- intrpc_mkpipe_dentry(structdentry*parent,constchar*name,void*private,structrpc_pipe*pipe)¶
make an rpc_pipefs file for kernel<->userspace communication
Parameters
structdentry*parentdentry of directory to create new “pipe” in
constchar*namename of pipe
void*privateprivate data to associate with the pipe, for the caller’s use
structrpc_pipe*piperpc_pipecontaining input parameters
Description
Data is made available for userspace to read by calls torpc_queue_upcall(). The actual reads will result in calls toops->upcall, which will be called with the file pointer,message, and userspace buffer to copy to.
Writes can come at any time, and do not necessarily have to beresponses to upcalls. They will result in calls tomsg->downcall.
Theprivate argument passed here will be available to all these methodsfrom the file pointer, via RPC_I(file_inode(file))->private.
- voidrpc_unlink(structrpc_pipe*pipe)¶
remove a pipe
Parameters
structrpc_pipe*pipethe pipe to be removed
Description
After this call, lookups will no longer find the pipe, and anyattempts to read or write using preexisting opens of the pipe willreturn -EPIPE.
- voidrpc_init_pipe_dir_head(structrpc_pipe_dir_head*pdh)¶
initialise a
structrpc_pipe_dir_head
Parameters
structrpc_pipe_dir_head*pdhpointer to
structrpc_pipe_dir_head
- voidrpc_init_pipe_dir_object(structrpc_pipe_dir_object*pdo,conststructrpc_pipe_dir_object_ops*pdo_ops,void*pdo_data)¶
initialise a
structrpc_pipe_dir_object
Parameters
structrpc_pipe_dir_object*pdopointer to
structrpc_pipe_dir_objectconststructrpc_pipe_dir_object_ops*pdo_opspointer to const
structrpc_pipe_dir_object_opsvoid*pdo_datapointer to caller-defined data
- intrpc_add_pipe_dir_object(structnet*net,structrpc_pipe_dir_head*pdh,structrpc_pipe_dir_object*pdo)¶
associate a rpc_pipe_dir_object to a directory
Parameters
structnet*netpointer to
structnetstructrpc_pipe_dir_head*pdhpointer to
structrpc_pipe_dir_headstructrpc_pipe_dir_object*pdopointer to
structrpc_pipe_dir_object
- voidrpc_remove_pipe_dir_object(structnet*net,structrpc_pipe_dir_head*pdh,structrpc_pipe_dir_object*pdo)¶
remove a rpc_pipe_dir_object from a directory
Parameters
structnet*netpointer to
structnetstructrpc_pipe_dir_head*pdhpointer to
structrpc_pipe_dir_headstructrpc_pipe_dir_object*pdopointer to
structrpc_pipe_dir_object
- structrpc_pipe_dir_object*rpc_find_or_alloc_pipe_dir_object(structnet*net,structrpc_pipe_dir_head*pdh,int(*match)(structrpc_pipe_dir_object*,void*),structrpc_pipe_dir_object*(*alloc)(void*),void*data)¶
Parameters
structnet*netpointer to
structnetstructrpc_pipe_dir_head*pdhpointer to
structrpc_pipe_dir_headint(*match)(structrpc_pipe_dir_object*,void*)match
structrpc_pipe_dir_objectto datastructrpc_pipe_dir_object*(*alloc)(void*)allocate a new
structrpc_pipe_dir_objectvoid*datauser defined data for
match()andalloc()
- voidrpcb_getport_async(structrpc_task*task)¶
obtain the port for a given RPC service on a given host
Parameters
structrpc_task*tasktask that is waiting for portmapper request
Description
This one can be called for an ongoing RPC request, and can be used inan async (rpciod) context.
- structrpc_clnt*rpc_create(structrpc_create_args*args)¶
create an RPC client and transport with one call
Parameters
structrpc_create_args*argsrpc_clnt create argument structure
Description
Creates and initializes an RPC transport and an RPC client.
It can ping the server in order to determine if it is up, and to see ifit supports this program and version. RPC_CLNT_CREATE_NOPING disablesthis behavior so asynchronous tasks can also use rpc_create.
- structrpc_clnt*rpc_clone_client(structrpc_clnt*clnt)¶
Clone an RPC client structure
Parameters
structrpc_clnt*clntRPC client whose parameters are copied
Description
Returns a fresh RPC client or an ERR_PTR.
- structrpc_clnt*rpc_clone_client_set_auth(structrpc_clnt*clnt,rpc_authflavor_tflavor)¶
Clone an RPC client structure and set its auth
Parameters
structrpc_clnt*clntRPC client whose parameters are copied
rpc_authflavor_tflavorsecurity flavor for new client
Description
Returns a fresh RPC client or an ERR_PTR.
- intrpc_switch_client_transport(structrpc_clnt*clnt,structxprt_create*args,conststructrpc_timeout*timeout)¶
switch the RPC transport on the fly
Parameters
structrpc_clnt*clntpointer to a
structrpc_clntstructxprt_create*argspointer to the new transport arguments
conststructrpc_timeout*timeoutpointer to the new timeout parameters
Description
This function allows the caller to switch the RPC transport for therpc_clnt structure ‘clnt’ to allow it to connect to a mirrored NFSserver, for instance. It assumes that the caller has ensured thatthere are no active RPC tasks by using some form of locking.
Returns zero if “clnt” is now using the new xprt. Otherwise anegative errno is returned, and “clnt” continues to use the oldxprt.
- intrpc_clnt_iterate_for_each_xprt(structrpc_clnt*clnt,int(*fn)(structrpc_clnt*,structrpc_xprt*,void*),void*data)¶
Apply a function to all transports
Parameters
structrpc_clnt*clntpointer to client
int(*fn)(structrpc_clnt*,structrpc_xprt*,void*)function to apply
void*datavoid pointer to function data
Description
Iterates through the list of RPC transports currently attached to theclient and applies the function fn(clnt, xprt, data).
On error, the iteration stops, and the function returns the error value.
- unsignedlongrpc_cancel_tasks(structrpc_clnt*clnt,interror,bool(*fnmatch)(conststructrpc_task*,constvoid*),constvoid*data)¶
try to cancel a set of RPC tasks
Parameters
structrpc_clnt*clntPointer to RPC client
interrorRPC task error value to set
bool(*fnmatch)(conststructrpc_task*,constvoid*)Pointer to selector function
constvoid*dataUser data
Description
Usesfnmatch to define a set of RPC tasks that are to be cancelled.The argumenterror must be a negative error value.
- structrpc_clnt*rpc_bind_new_program(structrpc_clnt*old,conststructrpc_program*program,u32vers)¶
bind a new RPC program to an existing client
Parameters
structrpc_clnt*oldold rpc_client
conststructrpc_program*programrpc program to set
u32versrpc program version
Description
Clones the rpc client and sets up a new RPC program. This is mainlyof use for enabling different RPC programs to share the same transport.The Sun NFSv2/v3 ACL protocol can do this.
- structrpc_task*rpc_run_task(conststructrpc_task_setup*task_setup_data)¶
Allocate a new RPC task, then run rpc_execute against it
Parameters
conststructrpc_task_setup*task_setup_datapointer to task initialisation data
- intrpc_call_sync(structrpc_clnt*clnt,conststructrpc_message*msg,intflags)¶
Perform a synchronous RPC call
Parameters
structrpc_clnt*clntpointer to RPC client
conststructrpc_message*msgRPC call parameters
intflagsRPC call flags
- intrpc_call_async(structrpc_clnt*clnt,conststructrpc_message*msg,intflags,conststructrpc_call_ops*tk_ops,void*data)¶
Perform an asynchronous RPC call
Parameters
structrpc_clnt*clntpointer to RPC client
conststructrpc_message*msgRPC call parameters
intflagsRPC call flags
conststructrpc_call_ops*tk_opsRPC call ops
void*datauser call data
- voidrpc_prepare_reply_pages(structrpc_rqst*req,structpage**pages,unsignedintbase,unsignedintlen,unsignedinthdrsize)¶
Prepare to receive a reply data payload into pages
Parameters
structrpc_rqst*reqRPC request to prepare
structpage**pagesvector of
structpagepointersunsignedintbaseoffset in first page where receive should start, in bytes
unsignedintlenexpected size of the upper layer data payload, in bytes
unsignedinthdrsizeexpected size of upper layer reply header, in XDR words
- size_trpc_peeraddr(structrpc_clnt*clnt,structsockaddr*buf,size_tbufsize)¶
extract remote peer address from clnt’s xprt
Parameters
structrpc_clnt*clntRPC client structure
structsockaddr*buftarget buffer
size_tbufsizelength of target buffer
Description
Returns the number of bytes that are actually in the stored address.
- constchar*rpc_peeraddr2str(structrpc_clnt*clnt,enumrpc_display_format_tformat)¶
return remote peer address in printable format
Parameters
structrpc_clnt*clntRPC client structure
enumrpc_display_format_tformataddress format
Description
NB: the lifetime of the memory referenced by the returned pointer isthe same as the rpc_xprt itself. As long as the caller uses thispointer, it must hold the RCU read lock.
- intrpc_localaddr(structrpc_clnt*clnt,structsockaddr*buf,size_tbuflen)¶
discover local endpoint address for an RPC client
Parameters
structrpc_clnt*clntRPC client structure
structsockaddr*buftarget buffer
size_tbuflensize of target buffer, in bytes
Description
Returns zero and fills in “buf” and “buflen” if successful;otherwise, a negative errno is returned.
This works even if the underlying transport is not currently connected,or if the upper layer never previously provided a source address.
The result of this function call is transient: multiple calls insuccession may give different results, depending on how localnetworking configuration changes over time.
- structnet*rpc_net_ns(structrpc_clnt*clnt)¶
Get the network namespace for this RPC client
Parameters
structrpc_clnt*clntRPC client to query
- size_trpc_max_payload(structrpc_clnt*clnt)¶
Get maximum payload size for a transport, in bytes
Parameters
structrpc_clnt*clntRPC client to query
Description
For stream transports, this is one RPC record fragment (see RFC1831), as we don’t support multi-record requests yet. For datagramtransports, this is the size of an IP packet minus the IP, UDP, andRPC header sizes.
- size_trpc_max_bc_payload(structrpc_clnt*clnt)¶
Get maximum backchannel payload size, in bytes
Parameters
structrpc_clnt*clntRPC client to query
- voidrpc_force_rebind(structrpc_clnt*clnt)¶
force transport to check that remote port is unchanged
Parameters
structrpc_clnt*clntclient to rebind
- intrpc_clnt_test_and_add_xprt(structrpc_clnt*clnt,structrpc_xprt_switch*xps,structrpc_xprt*xprt,void*in_max_connect)¶
Test and add a new transport to a rpc_clnt
Parameters
structrpc_clnt*clntpointer to
structrpc_clntstructrpc_xprt_switch*xpspointer to
structrpc_xprt_switch,structrpc_xprt*xprtpointer
structrpc_xprtvoid*in_max_connectpointer to the max_connect value for the passed in xprt transport
- intrpc_clnt_setup_test_and_add_xprt(structrpc_clnt*clnt,structrpc_xprt_switch*xps,structrpc_xprt*xprt,void*data)¶
Parameters
structrpc_clnt*clntstructrpc_clntto get the new transportstructrpc_xprt_switch*xpsthe rpc_xprt_switch to hold the new transport
structrpc_xprt*xprtthe rpc_xprt to test
void*dataa
structrpc_add_xprt_testpointer that holds the test functionand test function call data
Description
- This is an rpc_clnt_add_xprt setup() function which returns 1 so:
1) caller of the test function must dereference the rpc_xprt_switchand the rpc_xprt.2) test function must call rpc_xprt_switch_add_xprt, usually inthe rpc_call_done routine.
Upon success (return of 1), the test function adds the newtransport to the rpc_clnt xprt switch
- intrpc_clnt_add_xprt(structrpc_clnt*clnt,structxprt_create*xprtargs,int(*setup)(structrpc_clnt*,structrpc_xprt_switch*,structrpc_xprt*,void*),void*data)¶
Add a new transport to a rpc_clnt
Parameters
structrpc_clnt*clntpointer to
structrpc_clntstructxprt_create*xprtargspointer to
structxprt_createint(*setup)(structrpc_clnt*,structrpc_xprt_switch*,structrpc_xprt*,void*)callback to test and/or set up the connection
void*datapointer to setup function data
Description
Creates a new transport using the parameters set in args andadds it to clnt.If ping is set, then test that connectivity succeeds beforeadding the new transport.
Network device support¶
Driver Support¶
- voiddev_add_pack(structpacket_type*pt)¶
add packet handler
Parameters
structpacket_type*ptpacket type declaration
Description
Add a protocol handler to the networking stack. The passed
packet_typeis linked into kernel lists and may not be freed until it has beenremoved from the kernel lists.This call does not sleep therefore it can notguarantee all CPU’s that are in middle of receiving packetswill see the new packet type (until the next received packet).
- void__dev_remove_pack(structpacket_type*pt)¶
remove packet handler
Parameters
structpacket_type*ptpacket type declaration
Description
Remove a protocol handler that was previously added to the kernelprotocol handlers by
dev_add_pack(). The passedpacket_typeis removedfrom the kernel lists and can be freed or reused once this functionreturns.The packet type might still be in use by receiversand must not be freed until after all the CPU’s have gonethrough a quiescent state.
- voiddev_remove_pack(structpacket_type*pt)¶
remove packet handler
Parameters
structpacket_type*ptpacket type declaration
Description
Remove a protocol handler that was previously added to the kernelprotocol handlers by
dev_add_pack(). The passedpacket_typeis removedfrom the kernel lists and can be freed or reused once this functionreturns.This call sleeps to guarantee that no CPU is looking at the packettype after return.
- intdev_get_iflink(conststructnet_device*dev)¶
get ‘iflink’ value of a interface
Parameters
conststructnet_device*devtargeted interface
Description
Indicates the ifindex the interface is linked to.Physical interfaces have the same ‘ifindex’ and ‘iflink’ values.
- intdev_fill_metadata_dst(structnet_device*dev,structsk_buff*skb)¶
Retrieve tunnel egress information.
Parameters
structnet_device*devtargeted interface
structsk_buff*skbThe packet.
Description
For better visibility of tunnel traffic OVS needs to retrieveegress tunnel information for a packet. Following API allowsuser to get this info.
- structnet_device*__dev_get_by_name(structnet*net,constchar*name)¶
find a device by its name
Parameters
structnet*netthe applicable net namespace
constchar*namename to find
Description
Find an interface by name. Must be called under RTNL semaphore.If the name is found a pointer to the device is returned.If the name is not found then
NULLis returned. Thereference counters are not incremented so the caller must becareful with locks.
- structnet_device*dev_get_by_name_rcu(structnet*net,constchar*name)¶
find a device by its name
Parameters
structnet*netthe applicable net namespace
constchar*namename to find
Description
Find an interface by name.If the name is found a pointer to the device is returned.If the name is not found thenNULL is returned.The reference counters are not incremented so the caller must becareful with locks. The caller must hold RCU lock.
- structnet_device*netdev_get_by_name(structnet*net,constchar*name,netdevice_tracker*tracker,gfp_tgfp)¶
find a device by its name
Parameters
structnet*netthe applicable net namespace
constchar*namename to find
netdevice_tracker*trackertracking object for the acquired reference
gfp_tgfpallocation flags for the tracker
Description
Find an interface by name. This can be called from anycontext and does its own locking. The returned handle hasthe usage count incremented and the caller must use
netdev_put()torelease it when it is no longer needed.NULLis returned if nomatching device is found.
- structnet_device*__dev_get_by_index(structnet*net,intifindex)¶
find a device by its ifindex
Parameters
structnet*netthe applicable net namespace
intifindexindex of device
Description
Search for an interface by index. Returns
NULLif the deviceis not found or a pointer to the device. The device has nothad its reference counter increased so the caller must be carefulabout locking. The caller must hold the RTNL semaphore.
- structnet_device*dev_get_by_index_rcu(structnet*net,intifindex)¶
find a device by its ifindex
Parameters
structnet*netthe applicable net namespace
intifindexindex of device
Description
Search for an interface by index. Returns
NULLif the deviceis not found or a pointer to the device. The device has nothad its reference counter increased so the caller must be carefulabout locking. The caller must hold RCU lock.
- structnet_device*netdev_get_by_index(structnet*net,intifindex,netdevice_tracker*tracker,gfp_tgfp)¶
find a device by its ifindex
Parameters
structnet*netthe applicable net namespace
intifindexindex of device
netdevice_tracker*trackertracking object for the acquired reference
gfp_tgfpallocation flags for the tracker
Description
Search for an interface by index. Returns NULL if the deviceis not found or a pointer to the device. The device returned hashad a reference added and the pointer is safe until the user calls
netdev_put()to indicate they have finished with it.
- structnet_device*dev_getbyhwaddr_rcu(structnet*net,unsignedshorttype,constchar*ha)¶
find a device by its hardware address
Parameters
structnet*netthe applicable net namespace
unsignedshorttypemedia type of device
constchar*hahardware address
Description
Search for an interface by MAC address. Returns NULL if the deviceis not found or a pointer to the device.The caller must hold RCU.The returned device has not had its ref count increasedand the caller must therefore be careful about locking
- structnet_device*dev_getbyhwaddr(structnet*net,unsignedshorttype,constchar*ha)¶
find a device by its hardware address
Parameters
structnet*netthe applicable net namespace
unsignedshorttypemedia type of device
constchar*hahardware address
Description
Similar todev_getbyhwaddr_rcu(), but the owner needs to holdrtnl_lock.
Context
rtnl_lock() must be held.
Return
pointer to the net_device, or NULL if not found
- booldev_valid_name(constchar*name)¶
check if name is okay for network device
Parameters
constchar*namename string
Description
Network device names need to be valid file names toallow sysfs to work. We also disallow any kind ofwhitespace.
- intdev_alloc_name(structnet_device*dev,constchar*name)¶
allocate a name for a device
Parameters
structnet_device*devdevice
constchar*namename format string
Description
Passed a format string - eg “lt``d``” it will try and find a suitableid. It scans list of devices to build up a free map, then choosesthe first empty slot. The caller must hold the dev_base or rtnl lockwhile allocating the name and adding the device in order to avoidduplicates.Limited to bits_per_byte * page size devices (ie 32K on most platforms).Returns the number of the unit assigned or a negative errno code.
- voidnetdev_features_change(structnet_device*dev)¶
device changes features
Parameters
structnet_device*devdevice to cause notification
Description
Called to indicate a device has changed features.
- void__netdev_notify_peers(structnet_device*dev)¶
notify network peers about existence ofdev, to be called when rtnl lock is already held.
Parameters
structnet_device*devnetwork device
Description
Generate traffic such that interested network peers are aware ofdev, such as by generating a gratuitous ARP. This may be used whena device wants to inform the rest of the network about some sort ofreconfiguration such as a failover event or virtual machinemigration.
- voidnetdev_notify_peers(structnet_device*dev)¶
notify network peers about existence ofdev
Parameters
structnet_device*devnetwork device
Description
Generate traffic such that interested network peers are aware ofdev, such as by generating a gratuitous ARP. This may be used whena device wants to inform the rest of the network about some sort ofreconfiguration such as a failover event or virtual machinemigration.
- intregister_netdevice_notifier(structnotifier_block*nb)¶
register a network notifier block
Parameters
structnotifier_block*nbnotifier
Description
Register a notifier to be called when network device events occur.The notifier passed is linked into the kernel structures and mustnot be reused until it has been unregistered. A negative errno codeis returned on a failure.
When registered all registration and up events are replayedto the new notifier to allow device to have a race freeview of the network device list.
- intunregister_netdevice_notifier(structnotifier_block*nb)¶
unregister a network notifier block
Parameters
structnotifier_block*nbnotifier
Description
Unregister a notifier previously registered byregister_netdevice_notifier(). The notifier is unlinked into thekernel structures and may then be reused. A negative errno codeis returned on a failure.
After unregistering unregister and down device events are synthesizedfor all devices on the device list to the removed notifier to removethe need for special case cleanup code.
- intregister_netdevice_notifier_net(structnet*net,structnotifier_block*nb)¶
register a per-netns network notifier block
Parameters
structnet*netnetwork namespace
structnotifier_block*nbnotifier
Description
Register a notifier to be called when network device events occur.The notifier passed is linked into the kernel structures and mustnot be reused until it has been unregistered. A negative errno codeis returned on a failure.
When registered all registration and up events are replayedto the new notifier to allow device to have a race freeview of the network device list.
- intunregister_netdevice_notifier_net(structnet*net,structnotifier_block*nb)¶
unregister a per-netns network notifier block
Parameters
structnet*netnetwork namespace
structnotifier_block*nbnotifier
Description
Unregister a notifier previously registered byregister_netdevice_notifier_net(). The notifier is unlinked from thekernel structures and may then be reused. A negative errno codeis returned on a failure.
After unregistering unregister and down device events are synthesizedfor all devices on the device list to the removed notifier to removethe need for special case cleanup code.
- intcall_netdevice_notifiers(unsignedlongval,structnet_device*dev)¶
call all network notifier blocks
Parameters
unsignedlongvalvalue passed unmodified to notifier function
structnet_device*devnet_device pointer passed unmodified to notifier function
Description
Call all network notifier blocks. Parameters and return valueare as for
raw_notifier_call_chain().
- intdev_forward_skb(structnet_device*dev,structsk_buff*skb)¶
loopback an skb to another netif
Parameters
structnet_device*devdestination network device
structsk_buff*skbbuffer to forward
Description
- return values:
NET_RX_SUCCESS (no congestion)NET_RX_DROP (packet was dropped, but freed)
dev_forward_skb can be used for injecting an skb from thestart_xmit function of one device into the receive queueof another device.
The receiving device may be in another namespace, sowe have to clear all information in the skb that couldimpact namespace isolation.
- booldev_nit_active_rcu(conststructnet_device*dev)¶
return true if any network interface taps are in use
Parameters
conststructnet_device*devnetwork device to check for the presence of taps
Description
The caller must hold the RCU lock
- intnetif_set_real_num_rx_queues(structnet_device*dev,unsignedintrxq)¶
set actual number of RX queues used
Parameters
structnet_device*devNetwork device
unsignedintrxqActual number of RX queues
Description
This must be called either with the rtnl_lock held or beforeregistration of the net device. Returns 0 on success, or anegative error code. If called before registration, it alwayssucceeds.
- intnetif_set_real_num_queues(structnet_device*dev,unsignedinttxq,unsignedintrxq)¶
set actual number of RX and TX queues used
Parameters
structnet_device*devNetwork device
unsignedinttxqActual number of TX queues
unsignedintrxqActual number of RX queues
Description
Set the real number of both TX and RX queues.Does nothing if the number of queues is already correct.
- voidnetif_set_tso_max_size(structnet_device*dev,unsignedintsize)¶
set the max size of TSO frames supported
Parameters
structnet_device*devnetdev to update
unsignedintsizemax skb->len of a TSO frame
Description
Set the limit on the size of TSO super-frames the device can handle.Unless explicitly set the stack will assume the value ofGSO_LEGACY_MAX_SIZE.
- voidnetif_set_tso_max_segs(structnet_device*dev,unsignedintsegs)¶
set the max number of segs supported for TSO
Parameters
structnet_device*devnetdev to update
unsignedintsegsmax number of TCP segments
Description
Set the limit on the number of TCP segments the device can generate froma single TSO super-frame.Unless explicitly set the stack will assume the value ofGSO_MAX_SEGS.
- voidnetif_inherit_tso_max(structnet_device*to,conststructnet_device*from)¶
copy all TSO limits from a lower device to an upper
Parameters
structnet_device*tonetdev to update
conststructnet_device*fromnetdev from which to copy the limits
- intnetif_get_num_default_rss_queues(void)¶
default number of RSS queues
Parameters
voidno arguments
Description
Default value is the number of physical cores if there are only 1 or 2, ordivided by 2 if there are more.
- voidnetif_device_detach(structnet_device*dev)¶
mark device as removed
Parameters
structnet_device*devnetwork device
Description
Mark device as removed from system and therefore no longer available.
- voidnetif_device_attach(structnet_device*dev)¶
mark device as attached
Parameters
structnet_device*devnetwork device
Description
Mark device as attached from system and restart if needed.
Parameters
structnet*netnetwork namespace this loopback is happening in
structsock*sksk needed to be a netfilter okfn
structsk_buff*skbbuffer to transmit
- int__dev_queue_xmit(structsk_buff*skb,structnet_device*sb_dev)¶
transmit a buffer
Parameters
structsk_buff*skbbuffer to transmit
structnet_device*sb_devsuboordinate device used for L2 forwarding offload
Description
Queue a buffer for transmission to a network device. The caller musthave set the device and priority and built the buffer before callingthis function. The function can be called from an interrupt.
When calling this method, interrupts MUST be enabled. This is becausethe BH enable code must have IRQs enabled so that it will not deadlock.
Regardless of the return value, the skb is consumed, so it is currentlydifficult to retry a send to this method. (You can bump the ref countbefore sending to hold a reference for retry if you are careful.)
Return
0 - buffer successfully transmitted
positive qdisc return code - NET_XMIT_DROP etc.
negative errno - other errors
- boolrps_may_expire_flow(structnet_device*dev,u16rxq_index,u32flow_id,u16filter_id)¶
check whether an RFS hardware filter may be removed
Parameters
structnet_device*devDevice on which the filter was set
u16rxq_indexRX queue index
u32flow_idFlow ID passed to
ndo_rx_flow_steer()u16filter_idFilter ID returned by
ndo_rx_flow_steer()
Description
Drivers that implementndo_rx_flow_steer() should periodically callthis function for each installed filter and remove the filters forwhich it returnstrue.
Parameters
structsk_buff*skbbuffer to post
Description
This behaves as netif_rx except that it does not disable bottom halves.As a result this function may only be invoked from the interrupt context(either hard or soft interrupt).
Parameters
structsk_buff*skbbuffer to post
Description
This function receives a packet from a device driver and queues it forthe upper (protocol) levels to process via the backlog NAPI device. Italways succeeds. The buffer may be dropped during processing forcongestion control or by the protocol layers.The network buffer is passed via the backlog NAPI device. Modern NICdriver should use NAPI and GRO.This function can used from interrupt and from process context. Thecaller from process context must not disable interrupts before invokingthis function.
return values:NET_RX_SUCCESS (no congestion)NET_RX_DROP (packet was dropped)
- boolnetdev_is_rx_handler_busy(structnet_device*dev)¶
check if receive handler is registered
Parameters
structnet_device*devdevice to check
Description
Check if a receive handler is already registered for a given device.Return true if there one.
The caller must hold the rtnl_mutex.
- intnetdev_rx_handler_register(structnet_device*dev,rx_handler_func_t*rx_handler,void*rx_handler_data)¶
register receive handler
Parameters
structnet_device*devdevice to register a handler for
rx_handler_func_t*rx_handlerreceive handler to register
void*rx_handler_datadata pointer that is used by rx handler
Description
Register a receive handler for a device. This handler will then becalled from __netif_receive_skb. A negative errno code is returnedon a failure.
The caller must hold the rtnl_mutex.
For a general description of rx_handler, see
enumrx_handler_result.
- voidnetdev_rx_handler_unregister(structnet_device*dev)¶
unregister receive handler
Parameters
structnet_device*devdevice to unregister a handler from
Description
Unregister a receive handler from a device.
The caller must hold the rtnl_mutex.
Parameters
structsk_buff*skbbuffer to process
Description
More direct receive version of
netif_receive_skb(). It shouldonly be used by callers that have a need to skip RPS and Generic XDP.Caller must also take care of handling if(page_is_)pfmemalloc.This function may only be called from softirq context and interruptsshould be enabled.
Return values (usually ignored):NET_RX_SUCCESS: no congestionNET_RX_DROP: packet was dropped
Parameters
structsk_buff*skbbuffer to process
Description
netif_receive_skb()is the main receive data processing function.It always succeeds. The buffer may be dropped during processingfor congestion control or by the protocol layers.This function may only be called from softirq context and interruptsshould be enabled.
Return values (usually ignored):NET_RX_SUCCESS: no congestionNET_RX_DROP: packet was dropped
- voidnetif_receive_skb_list(structlist_head*head)¶
process many receive buffers from network
Parameters
structlist_head*headlist of skbs to process.
Description
Since return value of
netif_receive_skb()is normally ignored, andwouldn’t be meaningful for a list, this function returns void.This function may only be called from softirq context and interruptsshould be enabled.
- void__napi_schedule(structnapi_struct*n)¶
schedule for receive
Parameters
structnapi_struct*nentry to schedule
Description
The entry’s receive function will be scheduled to run.Consider using__napi_schedule_irqoff() if hard irqs are masked.
- boolnapi_schedule_prep(structnapi_struct*n)¶
check if napi can be scheduled
Parameters
structnapi_struct*nnapi context
Description
Test if NAPI routine is already running, and if not markit as running. This is used as a condition variable toinsure only one NAPI poll instance runs. We also makesure there is no pending NAPI disable.
- void__napi_schedule_irqoff(structnapi_struct*n)¶
schedule for receive
Parameters
structnapi_struct*nentry to schedule
Description
Variant of__napi_schedule() assuming hard irqs are masked.
On PREEMPT_RT enabled kernels this maps to__napi_schedule()because the interrupt disabled assumption might not be truedue to force-threaded interrupts and spinlock substitution.
- voidnetif_threaded_enable(structnet_device*dev)¶
enable threaded NAPIs
Parameters
structnet_device*devnet_device instance
Description
Enable threaded mode for the NAPI instances of the device. This may be usefulfor devices where multiple NAPI instances get scheduled by a singleinterrupt. Threaded NAPI allows moving the NAPI processing to cores otherthan the core where IRQ is mapped.
This function should be called beforedev is registered.
- voidnetif_queue_set_napi(structnet_device*dev,unsignedintqueue_index,enumnetdev_queue_typetype,structnapi_struct*napi)¶
Associate queue with the napi
Parameters
structnet_device*devdevice to which NAPI and queue belong
unsignedintqueue_indexIndex of queue
enumnetdev_queue_typetypequeue type as RX or TX
structnapi_struct*napiNAPI context, pass NULL to clear previously set NAPI
Description
Set queue with its corresponding napi context. This should be done afterregistering the NAPI handler for the queue-vector and the queues have beenmapped to the corresponding interrupt vector.
- voidnapi_disable(structnapi_struct*n)¶
prevent NAPI from scheduling
Parameters
structnapi_struct*nNAPI context
Description
Stop NAPI from being scheduled on this context.Waits till any outstanding processing completes.Takesnetdev_lock() for associated net_device.
- voidnapi_enable(structnapi_struct*n)¶
enable NAPI scheduling
Parameters
structnapi_struct*nNAPI context
Description
Enable scheduling of a NAPI instance.Must be paired withnapi_disable().Takesnetdev_lock() for associated net_device.
- boolnetdev_has_upper_dev(structnet_device*dev,structnet_device*upper_dev)¶
Check if device is linked to an upper device
Parameters
structnet_device*devdevice
structnet_device*upper_devupper device to check
Description
Find out if a device is linked to specified upper device and return truein case it is. Note that this checks only immediate upper device,not through a complete stack of devices. The caller must hold the RTNL lock.
- boolnetdev_has_upper_dev_all_rcu(structnet_device*dev,structnet_device*upper_dev)¶
Check if device is linked to an upper device
Parameters
structnet_device*devdevice
structnet_device*upper_devupper device to check
Description
Find out if a device is linked to specified upper device and return truein case it is. Note that this checks the entire upper device chain.The caller must hold rcu lock.
- boolnetdev_has_any_upper_dev(structnet_device*dev)¶
Check if device is linked to some device
Parameters
structnet_device*devdevice
Description
Find out if a device is linked to an upper device and return true in caseit is. The caller must hold the RTNL lock.
- structnet_device*netdev_master_upper_dev_get(structnet_device*dev)¶
Get master upper device
Parameters
structnet_device*devdevice
Description
Find a master upper device and return pointer to it or NULL in caseit’s not there. The caller must hold the RTNL lock.
- structnet_device*netdev_upper_get_next_dev_rcu(structnet_device*dev,structlist_head**iter)¶
Get the next dev from upper list
Parameters
structnet_device*devdevice
structlist_head**iterlist_head ** of the current position
Description
Gets the next device from the dev’s upper list, starting from iterposition. The caller must hold RCU read lock.
- void*netdev_lower_get_next_private(structnet_device*dev,structlist_head**iter)¶
Get the next ->private from the lower neighbour list
Parameters
structnet_device*devdevice
structlist_head**iterlist_head ** of the current position
Description
Gets the next netdev_adjacent->private from the dev’s lower neighbourlist, starting from iter position. The caller must hold either hold theRTNL lock or its own locking that guarantees that the neighbour lowerlist will remain unchanged.
- void*netdev_lower_get_next_private_rcu(structnet_device*dev,structlist_head**iter)¶
Get the next ->private from the lower neighbour list, RCU variant
Parameters
structnet_device*devdevice
structlist_head**iterlist_head ** of the current position
Description
Gets the next netdev_adjacent->private from the dev’s lower neighbourlist, starting from iter position. The caller must hold RCU read lock.
- void*netdev_lower_get_next(structnet_device*dev,structlist_head**iter)¶
Get the next device from the lower neighbour list
Parameters
structnet_device*devdevice
structlist_head**iterlist_head ** of the current position
Description
Gets the next netdev_adjacent from the dev’s lower neighbourlist, starting from iter position. The caller must hold RTNL lock orits own locking that guarantees that the neighbour lowerlist will remain unchanged.
- void*netdev_lower_get_first_private_rcu(structnet_device*dev)¶
Get the first ->private from the lower neighbour list, RCU variant
Parameters
structnet_device*devdevice
Description
Gets the first netdev_adjacent->private from the dev’s lower neighbourlist. The caller must hold RCU read lock.
- structnet_device*netdev_master_upper_dev_get_rcu(structnet_device*dev)¶
Get master upper device
Parameters
structnet_device*devdevice
Description
Find a master upper device and return pointer to it or NULL in caseit’s not there. The caller must hold the RCU read lock.
- intnetdev_upper_dev_link(structnet_device*dev,structnet_device*upper_dev,structnetlink_ext_ack*extack)¶
Add a link to the upper device
Parameters
structnet_device*devdevice
structnet_device*upper_devnew upper device
structnetlink_ext_ack*extacknetlink extended ack
Description
Adds a link to device which is upper to this one. The caller must holdthe RTNL lock. On a failure a negative errno code is returned.On success the reference counts are adjusted and the functionreturns zero.
- intnetdev_master_upper_dev_link(structnet_device*dev,structnet_device*upper_dev,void*upper_priv,void*upper_info,structnetlink_ext_ack*extack)¶
Add a master link to the upper device
Parameters
structnet_device*devdevice
structnet_device*upper_devnew upper device
void*upper_privupper device private
void*upper_infoupper info to be passed down via notifier
structnetlink_ext_ack*extacknetlink extended ack
Description
Adds a link to device which is upper to this one. In this case, onlyone master upper device can be linked, although other non-master devicesmight be linked as well. The caller must hold the RTNL lock.On a failure a negative errno code is returned. On success the referencecounts are adjusted and the function returns zero.
- voidnetdev_upper_dev_unlink(structnet_device*dev,structnet_device*upper_dev)¶
Removes a link to upper device
Parameters
structnet_device*devdevice
structnet_device*upper_devnew upper device
Description
Removes a link to device which is upper to this one. The caller must holdthe RTNL lock.
- voidnetdev_bonding_info_change(structnet_device*dev,structnetdev_bonding_info*bonding_info)¶
Dispatch event about slave change
Parameters
structnet_device*devdevice
structnetdev_bonding_info*bonding_infoinfo to dispatch
Description
Send NETDEV_BONDING_INFO to netdev notifiers with info.The caller must hold the RTNL lock.
- structnet_device*netdev_get_xmit_slave(structnet_device*dev,structsk_buff*skb,boolall_slaves)¶
Get the xmit slave of master device
Parameters
structnet_device*devdevice
structsk_buff*skbThe packet
boolall_slavesassume all the slaves are active
Description
The reference counters are not incremented so the caller must becareful with locks. The caller must hold RCU lock.NULL is returned if no slave is found.
- structnet_device*netdev_sk_get_lowest_dev(structnet_device*dev,structsock*sk)¶
Get the lowest device in chain given device and socket
Parameters
structnet_device*devdevice
structsock*skthe socket
Description
NULL is returned if no lower device is found.
- voidnetdev_lower_state_changed(structnet_device*lower_dev,void*lower_state_info)¶
Dispatch event about lower device state change
Parameters
structnet_device*lower_devdevice
void*lower_state_infostate to dispatch
Description
Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info.The caller must hold the RTNL lock.
- unsignedintnetif_get_flags(conststructnet_device*dev)¶
get flags reported to userspace
Parameters
conststructnet_device*devdevice
Description
Get the combination of flag bits exported through APIs to userspace.
- intnetif_pre_changeaddr_notify(structnet_device*dev,constchar*addr,structnetlink_ext_ack*extack)¶
Call NETDEV_PRE_CHANGEADDR.
Parameters
structnet_device*devdevice
constchar*addrnew address
structnetlink_ext_ack*extacknetlink extended ack
Return
0 on success, -errno on failure.
- intnetif_get_port_parent_id(structnet_device*dev,structnetdev_phys_item_id*ppid,boolrecurse)¶
Get the device’s port parent identifier
Parameters
structnet_device*devnetwork device
structnetdev_phys_item_id*ppidpointer to a storage for the port’s parent identifier
boolrecurseallow/disallow recursion to lower devices
Description
Get the devices’s port parent identifier.
Return
0 on success, -errno on failure.
- boolnetdev_port_same_parent_id(structnet_device*a,structnet_device*b)¶
Indicate if two network devices have the same port parent identifier
Parameters
structnet_device*afirst network device
structnet_device*bsecond network device
- voidnetdev_update_features(structnet_device*dev)¶
recalculate device features
Parameters
structnet_device*devthe device to check
Description
Recalculate dev->features set and send notifications if ithas changed. Should be called after driver or hardware dependentconditions might have changed that influence the features.
- voidnetdev_change_features(structnet_device*dev)¶
recalculate device features
Parameters
structnet_device*devthe device to check
Description
Recalculate dev->features set and send notifications evenif they have not changed. Should be called instead of
netdev_update_features()if also dev->vlan_features mighthave changed to allow the changes to be propagated to stackedVLAN devices.
- voidnetif_stacked_transfer_operstate(conststructnet_device*rootdev,structnet_device*dev)¶
transfer operstate
Parameters
conststructnet_device*rootdevthe root or lower level device to transfer state from
structnet_device*devthe device to transfer operstate to
Description
Transfer operational state from root to device. This is normallycalled when a stacking relationship exists between the rootdevice and the device(a leaf device).
- intregister_netdevice(structnet_device*dev)¶
register a network device
Parameters
structnet_device*devdevice to register
Description
Take a prepared network device structure and make it externally accessible.ANETDEV_REGISTER message is sent to the netdev notifier chain.Callers must hold the rtnl lock - you may wantregister_netdev()instead of this.
- intregister_netdev(structnet_device*dev)¶
register a network device
Parameters
structnet_device*devdevice to register
Description
Take a completed network device structure and add it to the kernelinterfaces. A
NETDEV_REGISTERmessage is sent to the netdev notifierchain. 0 is returned on success. A negative errno code is returnedon a failure to set up the device, or if the name is a duplicate.This is a wrapper around register_netdevice that takes the rtnl semaphoreand expands the device name if you passed a format string toalloc_netdev.
- structrtnl_link_stats64*dev_get_stats(structnet_device*dev,structrtnl_link_stats64*storage)¶
get network device statistics
Parameters
structnet_device*devdevice to get statistics from
structrtnl_link_stats64*storageplace to store stats
Description
Get network statistics from device. Returnstorage.The device driver may provide its own method by settingdev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;otherwise the internal statistics structure is used.
- voiddev_fetch_sw_netstats(structrtnl_link_stats64*s,conststructpcpu_sw_netstats__percpu*netstats)¶
get per-cpu network device statistics
Parameters
structrtnl_link_stats64*splace to store stats
conststructpcpu_sw_netstats__percpu*netstatsper-cpu network stats to read from
Description
Read per-cpu network statistics and populate the related fields ins.
- voiddev_get_tstats64(structnet_device*dev,structrtnl_link_stats64*s)¶
ndo_get_stats64 implementation
Parameters
structnet_device*devdevice to get statistics from
structrtnl_link_stats64*splace to store stats
Description
Populates from dev->stats and dev->tstats. Can be used as
ndo_get_stats64()callback.
- voidnetdev_sw_irq_coalesce_default_on(structnet_device*dev)¶
enable SW IRQ coalescing by default
Parameters
structnet_device*devnetdev to enable the IRQ coalescing on
Description
Sets a conservative default for SW IRQ coalescing. Users can usesysfs attributes to override the default values.
- structnet_device*alloc_netdev_mqs(intsizeof_priv,constchar*name,unsignedcharname_assign_type,void(*setup)(structnet_device*),unsignedinttxqs,unsignedintrxqs)¶
allocate network device
Parameters
intsizeof_privsize of private data to allocate space for
constchar*namedevice name format string
unsignedcharname_assign_typeorigin of device name
void(*setup)(structnet_device*)callback to initialize device
unsignedinttxqsthe number of TX subqueues to allocate
unsignedintrxqsthe number of RX subqueues to allocate
Description
Allocates astructnet_device with private data area for driver useand performs basic initialization. Also allocates subqueue structsfor each queue on the device.
- voidfree_netdev(structnet_device*dev)¶
free network device
Parameters
structnet_device*devdevice
Description
This function does the last stage of destroying an allocated deviceinterface. The reference to the device object is released. If thisis the last reference then it will be freed.Must be called in processcontext.
- structnet_device*alloc_netdev_dummy(intsizeof_priv)¶
Allocate and initialize a dummy net device.
Parameters
intsizeof_privsize of private data to allocate space for
Return
the allocated net_device on success, NULL otherwise
- voidsynchronize_net(void)¶
Synchronize with packet receive processing
Parameters
voidno arguments
Description
Wait for packets currently being received to be done.Does not block later packets from starting.
- voidunregister_netdevice_queue(structnet_device*dev,structlist_head*head)¶
remove device from the kernel
Parameters
structnet_device*devdevice
structlist_head*headlist
Description
This function shuts down a device interface and removes itfrom the kernel tables.If head not NULL, device is queued to be unregistered later.
Callers must hold the rtnl semaphore. You may want
unregister_netdev()instead of this.
- voidunregister_netdevice_many(structlist_head*head)¶
unregister many devices
Parameters
structlist_head*headlist of devices
Note
- As most callers use a stack allocated list_head,
we force a
list_del()to make sure stack won’t be corrupted later.
- voidunregister_netdev(structnet_device*dev)¶
remove device from the kernel
Parameters
structnet_device*devdevice
Description
This function shuts down a device interface and removes itfrom the kernel tables.
This is just a wrapper for unregister_netdevice that takesthe rtnl semaphore. In general you want to use this and notunregister_netdevice.
- netdev_features_tnetdev_increment_features(netdev_features_tall,netdev_features_tone,netdev_features_tmask)¶
increment feature set by one
Parameters
netdev_features_tallcurrent feature set
netdev_features_tonenew feature set
netdev_features_tmaskmask feature set
Description
Computes a new feature set after adding a device with feature setone to the master device with current feature setall. Will notenable anything that is off inmask. Returns the new feature set.
- voidnetdev_compute_master_upper_features(structnet_device*dev,boolupdate_header)¶
compute feature from lowers
Parameters
structnet_device*devthe upper device
boolupdate_headerwhether to update upper device’s header_len/headroom/tailroom
Description
Recompute the upper device’s feature based on all lower devices.
- inteth_header(structsk_buff*skb,structnet_device*dev,unsignedshorttype,constvoid*daddr,constvoid*saddr,unsignedintlen)¶
create the Ethernet header
Parameters
structsk_buff*skbbuffer to alter
structnet_device*devsource device
unsignedshorttypeEthernet type field
constvoid*daddrdestination address (NULL leave destination address)
constvoid*saddrsource address (NULL use device source address)
unsignedintlenpacket length (<= skb->len)
Description
Set the protocol type. For a packet of type ETH_P_802_3/2 we put the lengthin here instead.
- u32eth_get_headlen(conststructnet_device*dev,constvoid*data,u32len)¶
determine the length of header for an ethernet frame
Parameters
conststructnet_device*devpointer to network device
constvoid*datapointer to start of frame
u32lentotal length of frame
Description
Make a best effort attempt to pull the length for all of the headers fora given frame in a linear buffer.
- __be16eth_type_trans(structsk_buff*skb,structnet_device*dev)¶
determine the packet’s protocol ID.
Parameters
structsk_buff*skbreceived socket data
structnet_device*devreceiving network device
Description
The rule here is that weassume 802.3 if the type field is short enough to be a length.This is normal practice and works for any ‘now in use’ protocol.
Parameters
conststructsk_buff*skbpacket to extract header from
unsignedchar*haddrdestination buffer
- inteth_header_cache(conststructneighbour*neigh,structhh_cache*hh,__be16type)¶
fill cache entry from neighbour
Parameters
conststructneighbour*neighsource neighbour
structhh_cache*hhdestination cache entry
__be16typeEthernet type field
Description
Create an Ethernet header template from the neighbour.
- voideth_header_cache_update(structhh_cache*hh,conststructnet_device*dev,constunsignedchar*haddr)¶
update cache entry
Parameters
structhh_cache*hhdestination cache entry
conststructnet_device*devnetwork device
constunsignedchar*haddrnew hardware address
Description
Called by Address Resolution module to notify changes in address.
Parameters
conststructsk_buff*skbpacket to extract protocol from
- inteth_prepare_mac_addr_change(structnet_device*dev,void*p)¶
prepare for mac change
Parameters
structnet_device*devnetwork device
void*psocket address
- voideth_commit_mac_addr_change(structnet_device*dev,void*p)¶
commit mac change
Parameters
structnet_device*devnetwork device
void*psocket address
- inteth_mac_addr(structnet_device*dev,void*p)¶
set new Ethernet hardware address
Parameters
structnet_device*devnetwork device
void*psocket address
Description
Change hardware address of device.
This doesn’t change hardware matching, so needs to be overriddenfor most real devices.
- voidether_setup(structnet_device*dev)¶
setup Ethernet network device
Parameters
structnet_device*devnetwork device
Description
Fill in the fields of the device structure with Ethernet-generic values.
- structnet_device*alloc_etherdev_mqs(intsizeof_priv,unsignedinttxqs,unsignedintrxqs)¶
Allocates and sets up an Ethernet device
Parameters
intsizeof_privSize of additional driver-private structure to be allocatedfor this Ethernet device
unsignedinttxqsThe number of TX queues this device has.
unsignedintrxqsThe number of RX queues this device has.
Description
Fill in the fields of the device structure with Ethernet-genericvalues. Basically does everything except registering the device.
Constructs a new net device, complete with a private data area ofsize (sizeof_priv). A 32-byte (not bit) alignment is enforced forthis private data area.
- intplatform_get_ethdev_address(structdevice*dev,structnet_device*netdev)¶
Set netdev’s MAC address from a given device
Parameters
structdevice*devPointer to the device
structnet_device*netdevPointer to netdev to write the address to
Description
Wrapper aroundeth_platform_get_mac_address() which writes the addressdirectly to netdev->dev_addr.
- intfwnode_get_mac_address(structfwnode_handle*fwnode,char*addr)¶
Get the MAC from the firmware node
Parameters
structfwnode_handle*fwnodePointer to the firmware node
char*addrAddress of buffer to store the MAC in
Description
Search the firmware node for the best MAC address to use. ‘mac-address’ ischecked first, because that is supposed to contain to “most recent” MACaddress. If that isn’t set, then ‘local-mac-address’ is checked next,because that is the default address. If that isn’t set, then the obsolete‘address’ is checked, just in case we’re using an old device tree.
Note that the ‘address’ property is supposed to contain a virtual address ofthe register set, but some DTS files have redefined that property to be theMAC address.
All-zero MAC addresses are rejected, because those could be properties thatexist in the firmware tables, but were not updated by the firmware. Forexample, the DTS could define ‘mac-address’ and ‘local-mac-address’, withzero MAC addresses. Some older U-Boots only initialized ‘local-mac-address’.In this case, the real MAC is in ‘local-mac-address’, and ‘mac-address’exists but is all zeros.
Parameters
structdevice*devPointer to the device
char*addrAddress of buffer to store the MAC in
- intdevice_get_ethdev_address(structdevice*dev,structnet_device*netdev)¶
Set netdev’s MAC address from a given device
Parameters
structdevice*devPointer to the device
structnet_device*netdevPointer to netdev to write the address to
Description
Wrapper arounddevice_get_mac_address() which writes the addressdirectly to netdev->dev_addr.
- voidnetif_carrier_on(structnet_device*dev)¶
set carrier
Parameters
structnet_device*devnetwork device
Description
Device has detected acquisition of carrier.
- voidnetif_carrier_off(structnet_device*dev)¶
clear carrier
Parameters
structnet_device*devnetwork device
Description
Device has detected loss of carrier.
- voidnetif_carrier_event(structnet_device*dev)¶
report carrier state event
Parameters
structnet_device*devnetwork device
Description
Device has detected a carrier event but the carrier state wasn’t changed.Use in drivers when querying carrier state asynchronously, to avoid missingevents (link flaps) if link recovers before it’s queried.
- boolis_link_local_ether_addr(constu8*addr)¶
Determine if given Ethernet address is link-local
Parameters
constu8*addrPointer to a six-byte array containing the Ethernet address
Return
true if address is link local reserved addr (01:80:c2:00:00:0X) perIEEE 802.1Q 8.6.3 Frame filtering.
Description
Please note: addr must be aligned to u16.
- boolis_zero_ether_addr(constu8*addr)¶
Determine if give Ethernet address is all zeros.
Parameters
constu8*addrPointer to a six-byte array containing the Ethernet address
Return
true if the address is all zeroes.
Description
Please note: addr must be aligned to u16.
- boolis_multicast_ether_addr(constu8*addr)¶
Determine if the Ethernet address is a multicast.
Parameters
constu8*addrPointer to a six-byte array containing the Ethernet address
Return
true if the address is a multicast address.By definition the broadcast address is also a multicast address.
- boolis_local_ether_addr(constu8*addr)¶
Determine if the Ethernet address is locally-assigned one (IEEE 802).
Parameters
constu8*addrPointer to a six-byte array containing the Ethernet address
Return
true if the address is a local address.
- boolis_broadcast_ether_addr(constu8*addr)¶
Determine if the Ethernet address is broadcast
Parameters
constu8*addrPointer to a six-byte array containing the Ethernet address
Return
true if the address is the broadcast address.
Description
Please note: addr must be aligned to u16.
- boolis_unicast_ether_addr(constu8*addr)¶
Determine if the Ethernet address is unicast
Parameters
constu8*addrPointer to a six-byte array containing the Ethernet address
Return
true if the address is a unicast address.
- boolis_valid_ether_addr(constu8*addr)¶
Determine if the given Ethernet address is valid
Parameters
constu8*addrPointer to a six-byte array containing the Ethernet address
Description
Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is nota multicast address, and is not FF:FF:FF:FF:FF:FF.
Please note: addr must be aligned to u16.
Return
true if the address is valid.
- booleth_proto_is_802_3(__be16proto)¶
Determine if a given Ethertype/length is a protocol
Parameters
__be16protoEthertype/length value to be tested
Description
Check that the value from the Ethertype/length field is a valid Ethertype.
Return
true if the valid is an 802.3 supported Ethertype.
- voideth_random_addr(u8*addr)¶
Generate software assigned random Ethernet address
Parameters
u8*addrPointer to a six-byte array containing the Ethernet address
Description
Generate a random Ethernet address (MAC) that is not multicastand has the local assigned bit set.
- voideth_broadcast_addr(u8*addr)¶
Assign broadcast address
Parameters
u8*addrPointer to a six-byte array containing the Ethernet address
Description
Assign the broadcast address to the given address array.
- voideth_zero_addr(u8*addr)¶
Assign zero address
Parameters
u8*addrPointer to a six-byte array containing the Ethernet address
Description
Assign the zero address to the given address array.
- voideth_hw_addr_random(structnet_device*dev)¶
Generate software assigned random Ethernet and set device flag
Parameters
structnet_device*devpointer to net_device structure
Description
Generate a random Ethernet address (MAC) to be used by a net deviceand set addr_assign_type so the state can be read by sysfs and beused by userspace.
- u32eth_hw_addr_crc(structnetdev_hw_addr*ha)¶
Calculate CRC from netdev_hw_addr
Parameters
structnetdev_hw_addr*hapointer to hardware address
Description
Calculate CRC from a hardware address as basis for filter hashes.
- voidether_addr_copy(u8*dst,constu8*src)¶
Copy an Ethernet address
Parameters
u8*dstPointer to a six-byte array Ethernet address destination
constu8*srcPointer to a six-byte array Ethernet address source
Description
Please note: dst & src must both be aligned to u16.
- voideth_hw_addr_set(structnet_device*dev,constu8*addr)¶
Assign Ethernet address to a net_device
Parameters
structnet_device*devpointer to net_device structure
constu8*addraddress to assign
Description
Assign given address to the net_device, addr_assign_type is not changed.
- voideth_hw_addr_inherit(structnet_device*dst,structnet_device*src)¶
Copy dev_addr from another net_device
Parameters
structnet_device*dstpointer to net_device to copy dev_addr to
structnet_device*srcpointer to net_device to copy dev_addr from
Description
Copy the Ethernet address from one net_device to another along withthe address attributes (addr_assign_type).
- boolether_addr_equal(constu8*addr1,constu8*addr2)¶
Compare two Ethernet addresses
Parameters
constu8*addr1Pointer to a six-byte array containing the Ethernet address
constu8*addr2Pointer other six-byte array containing the Ethernet address
Description
Compare two Ethernet addresses, returns true if equal
Please note: addr1 & addr2 must both be aligned to u16.
- boolether_addr_equal_64bits(constu8*addr1,constu8*addr2)¶
Compare two Ethernet addresses
Parameters
constu8*addr1Pointer to an array of 8 bytes
constu8*addr2Pointer to an other array of 8 bytes
Description
Compare two Ethernet addresses, returns true if equal, false otherwise.
The function doesn’t need any conditional branches and possibly usesword memory accesses on CPU allowing cheap unaligned memory reads.arrays = { byte1, byte2, byte3, byte4, byte5, byte6, pad1, pad2 }
Please note that alignment of addr1 & addr2 are only guaranteed to be 16 bits.
- boolether_addr_equal_unaligned(constu8*addr1,constu8*addr2)¶
Compare two not u16 aligned Ethernet addresses
Parameters
constu8*addr1Pointer to a six-byte array containing the Ethernet address
constu8*addr2Pointer other six-byte array containing the Ethernet address
Description
Compare two Ethernet addresses, returns true if equal
Please note: Use only when any Ethernet address may not be u16 aligned.
- boolether_addr_equal_masked(constu8*addr1,constu8*addr2,constu8*mask)¶
Compare two Ethernet addresses with a mask
Parameters
constu8*addr1Pointer to a six-byte array containing the 1st Ethernet address
constu8*addr2Pointer to a six-byte array containing the 2nd Ethernet address
constu8*maskPointer to a six-byte array containing the Ethernet address bitmask
Description
Compare two Ethernet addresses with a mask, returns true if for every bitset in the bitmask the equivalent bits in the ethernet addresses are equal.Using a mask with all bits set is a slower ether_addr_equal.
- u64ether_addr_to_u64(constu8*addr)¶
Convert an Ethernet address into a u64 value.
Parameters
constu8*addrPointer to a six-byte array containing the Ethernet address
Return
a u64 value of the address
- voidu64_to_ether_addr(u64u,u8*addr)¶
Convert a u64 to an Ethernet address.
Parameters
u64uu64 to convert to an Ethernet MAC address
u8*addrPointer to a six-byte array to contain the Ethernet address
- voideth_addr_dec(u8*addr)¶
Decrement the given MAC address
Parameters
u8*addrPointer to a six-byte array containing Ethernet address to decrement
- voideth_addr_inc(u8*addr)¶
Increment the given MAC address.
Parameters
u8*addrPointer to a six-byte array containing Ethernet address to increment.
- voideth_addr_add(u8*addr,longoffset)¶
Add (or subtract) an offset to/from the given MAC address.
Parameters
u8*addrPointer to a six-byte array containing Ethernet address to increment.
longoffsetOffset to add.
- boolis_etherdev_addr(conststructnet_device*dev,constu8addr[6+2])¶
Tell if given Ethernet address belongs to the device.
Parameters
conststructnet_device*devPointer to a device structure
constu8addr[6+2]Pointer to a six-byte array containing the Ethernet address
Description
Compare passed address with all addresses of the device. Return true if theaddress if one of the device addresses.
Note that this function callsether_addr_equal_64bits() so take care ofthe right padding.
- unsignedlongcompare_ether_header(constvoid*a,constvoid*b)¶
Compare two Ethernet headers
Parameters
constvoid*aPointer to Ethernet header
constvoid*bPointer to Ethernet header
Description
Compare two Ethernet headers, returns 0 if equal.This assumes that the network header (i.e., IP header) is 4-bytealigned OR the platform can handle unaligned access. This is thecase for all packets coming into netif_receive_skb or similarentry points.
- voideth_hw_addr_gen(structnet_device*dev,constu8*base_addr,unsignedintid)¶
Generate and assign Ethernet address to a port
Parameters
structnet_device*devpointer to port’s net_device structure
constu8*base_addrbase Ethernet address
unsignedintidoffset to add to the base address
Description
Generate a MAC address using a base address and an offset and assign itto a net_device. Commonly used by switch drivers which need to computeaddresses for all their ports. addr_assign_type is not changed.
- voideth_skb_pkt_type(structsk_buff*skb,conststructnet_device*dev)¶
Assign packet type if destination address does not match
Parameters
structsk_buff*skbAssigned a packet type if address does not matchdev address
conststructnet_device*devNetwork device used to compare packet address against
Description
If the destination MAC address of the packet does not match the networkdevice address, assign an appropriate packet type.
Parameters
structsk_buff*skbBuffer to pad
Description
An Ethernet frame should have a minimum size of 60 bytes. This functiontakes short frames and pads them with zeros up to the 60 byte limit.
- structgro_node¶
structure to support Generic Receive Offload
Definition:
struct gro_node { unsigned long bitmask; struct gro_list hash[GRO_HASH_BUCKETS]; struct list_head rx_list; u32 rx_count; u32 cached_napi_id;};Members
bitmaskbitmask to indicate used buckets inhash
hashhashtable of pending aggregated skbs, separated by flows
rx_listlist of pending
GRO_NORMALskbsrx_countcached current length ofrx_list
cached_napi_idnapi_struct::napi_id cached for hotpath, 0 for standalone
- boolnapi_is_scheduled(structnapi_struct*n)¶
test if NAPI is scheduled
Parameters
structnapi_struct*nNAPI context
Description
This check is “best-effort”. With no locking implemented,a NAPI can be scheduled or terminate right after this checkand produce not precise results.
NAPI_STATE_SCHED is an internal state, napi_is_scheduledshould not be used normally and napi_schedule should beused instead.
Use only if the driver really needs to check if a NAPIis scheduled for example in the context of delayed timerthat can be skipped if a NAPI is already scheduled.
Return
True if NAPI is scheduled, False otherwise.
- boolnapi_schedule(structnapi_struct*n)¶
schedule NAPI poll
Parameters
structnapi_struct*nNAPI context
Description
Schedule NAPI poll routine to be called if it is not alreadyrunning.
Return
true if we schedule a NAPI or false if not.Refer tonapi_schedule_prep() for additional reason on whya NAPI might not be scheduled.
- voidnapi_schedule_irqoff(structnapi_struct*n)¶
schedule NAPI poll
Parameters
structnapi_struct*nNAPI context
Description
Variant ofnapi_schedule(), assuming hard irqs are masked.
- boolnapi_complete_done(structnapi_struct*n,intwork_done)¶
NAPI processing complete
Parameters
structnapi_struct*nNAPI context
intwork_donenumber of packets processed
Description
Mark NAPI processing as complete. Should only be called if poll budgethas not been completely consumed.Prefer overnapi_complete().
Return
false if device should avoid rearming interrupts.
- voidnapi_synchronize(conststructnapi_struct*n)¶
wait until NAPI is not running
Parameters
conststructnapi_struct*nNAPI context
Description
Wait until NAPI is done being scheduled on this context.Waits till any outstanding processing completes butdoes not disable future activations.
- boolnapi_if_scheduled_mark_missed(structnapi_struct*n)¶
if napi is running, set the NAPIF_STATE_MISSED
Parameters
structnapi_struct*nNAPI context
Description
If napi is running, set the NAPIF_STATE_MISSED, and return true ifNAPI is scheduled.
- enumnetdev_priv_flags¶
structnet_devicepriv_flags
Constants
IFF_802_1Q_VLAN802.1Q VLAN device
IFF_EBRIDGEEthernet bridging device
IFF_BONDINGbonding master or slave
IFF_ISATAPISATAP interface (RFC4214)
IFF_WAN_HDLCWAN HDLC device
IFF_XMIT_DST_RELEASEdev_hard_start_xmit()is allowed torelease skb->dstIFF_DONT_BRIDGEdisallow bridging this ether dev
IFF_DISABLE_NETPOLLdisable netpoll at run-time
IFF_MACVLAN_PORTdevice used as macvlan port
IFF_BRIDGE_PORTdevice used as bridge port
IFF_OVS_DATAPATHdevice used as Open vSwitch datapath port
IFF_TX_SKB_SHARINGThe interface supports sharing skbs on transmit
IFF_UNICAST_FLTSupports unicast filtering
IFF_TEAM_PORTdevice used as team port
IFF_SUPP_NOFCSdevice supports sending custom FCS
IFF_LIVE_ADDR_CHANGEdevice supports hardware addresschange when it’s running
IFF_MACVLANMacvlan device
IFF_XMIT_DST_RELEASE_PERMIFF_XMIT_DST_RELEASE not taking into accountunderlying stacked devices
IFF_L3MDEV_MASTERdevice is an L3 master device
IFF_NO_QUEUEdevice can run without qdisc attached
IFF_OPENVSWITCHdevice is a Open vSwitch master
IFF_L3MDEV_SLAVEdevice is enslaved to an L3 master device
IFF_TEAMdevice is a team device
IFF_RXFH_CONFIGUREDdevice has had Rx Flow indirection table configured
IFF_PHONY_HEADROOMthe headroom value is controlled by an externalentity (i.e. the master device for bridged veth)
IFF_MACSECdevice is a MACsec device
IFF_NO_RX_HANDLERdevice doesn’t support the rx_handler hook
IFF_FAILOVERdevice is a failover master device
IFF_FAILOVER_SLAVEdevice is lower dev of a failover master device
IFF_L3MDEV_RX_HANDLERonly invoke the rx handler of L3 master device
IFF_NO_ADDRCONFprevent ipv6 addrconf
IFF_TX_SKB_NO_LINEARdevice/driver is capable of xmitting frames withskb_headlen(skb) == 0 (data starts from frag0)
Description
These are thestructnet_device, they are only set internallyby drivers and used in the kernel. These flags are invisible touserspace; this means that the order of these flags can changeduring any kernel release.
You should add bitfield booleans after either net_device::priv_flags(hotpath) or ::threaded (slowpath) instead of extending these flags.
- structnet_device¶
The DEVICE structure.
Definition:
struct net_device { unsigned long priv_flags:32; unsigned long lltx:1; unsigned long netmem_tx:1; const struct net_device_ops *netdev_ops; const struct header_ops *header_ops; struct netdev_queue *_tx; netdev_features_t gso_partial_features; unsigned int real_num_tx_queues; unsigned int gso_max_size; unsigned int gso_ipv4_max_size; u16 gso_max_segs; s16 num_tc; unsigned int mtu; unsigned short needed_headroom; struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];#ifdef CONFIG_XPS; struct xps_dev_maps *xps_maps[XPS_MAPS_MAX];#endif;#ifdef CONFIG_NETFILTER_EGRESS; struct nf_hook_entries *nf_hooks_egress;#endif;#ifdef CONFIG_NET_XGRESS; struct bpf_mprog_entry *tcx_egress;#endif; union { struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; struct pcpu_dstats __percpu *dstats; }; unsigned long state; unsigned int flags; unsigned short hard_header_len; netdev_features_t features; struct inet6_dev *ip6_ptr; struct bpf_prog *xdp_prog; struct list_head ptype_specific; int ifindex; unsigned int real_num_rx_queues; struct netdev_rx_queue *_rx; unsigned int gro_max_size; unsigned int gro_ipv4_max_size; rx_handler_func_t *rx_handler; void *rx_handler_data; possible_net_t nd_net;#ifdef CONFIG_NETPOLL; struct netpoll_info *npinfo;#endif;#ifdef CONFIG_NET_XGRESS; struct bpf_mprog_entry *tcx_ingress;#endif; char name[IFNAMSIZ]; struct netdev_name_node *name_node; struct dev_ifalias *ifalias; unsigned long mem_end; unsigned long mem_start; unsigned long base_addr; struct list_head dev_list; struct list_head napi_list; struct list_head unreg_list; struct list_head close_list; struct list_head ptype_all; struct { struct list_head upper; struct list_head lower; } adj_list; xdp_features_t xdp_features; const struct xdp_metadata_ops *xdp_metadata_ops; const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops; unsigned short gflags; unsigned short needed_tailroom; netdev_features_t hw_features; netdev_features_t wanted_features; netdev_features_t vlan_features; netdev_features_t hw_enc_features; netdev_features_t mpls_features; unsigned int min_mtu; unsigned int max_mtu; unsigned short type; unsigned char min_header_len; unsigned char name_assign_type; int group; struct net_device_stats stats; struct net_device_core_stats __percpu *core_stats; atomic_t carrier_up_count; atomic_t carrier_down_count;#ifdef CONFIG_WIRELESS_EXT; const struct iw_handler_def *wireless_handlers;#endif; const struct ethtool_ops *ethtool_ops;#ifdef CONFIG_NET_L3_MASTER_DEV; const struct l3mdev_ops *l3mdev_ops;#endif;#if IS_ENABLED(CONFIG_IPV6); const struct ndisc_ops *ndisc_ops;#endif;#ifdef CONFIG_XFRM_OFFLOAD; const struct xfrmdev_ops *xfrmdev_ops;#endif;#if IS_ENABLED(CONFIG_TLS_DEVICE); const struct tlsdev_ops *tlsdev_ops;#endif; unsigned int operstate; unsigned char link_mode; unsigned char if_port; unsigned char dma; unsigned char perm_addr[MAX_ADDR_LEN]; unsigned char addr_assign_type; unsigned char addr_len; unsigned char upper_level; unsigned char lower_level; u8 threaded; unsigned short neigh_priv_len; unsigned short dev_id; unsigned short dev_port; int irq; u32 priv_len; spinlock_t addr_list_lock; struct netdev_hw_addr_list uc; struct netdev_hw_addr_list mc; struct netdev_hw_addr_list dev_addrs;#ifdef CONFIG_SYSFS; struct kset *queues_kset;#endif;#ifdef CONFIG_LOCKDEP; struct list_head unlink_list;#endif; unsigned int promiscuity; unsigned int allmulti; bool uc_promisc;#ifdef CONFIG_LOCKDEP; unsigned char nested_level;#endif; struct in_device *ip_ptr; struct hlist_head fib_nh_head;#if IS_ENABLED(CONFIG_VLAN_8021Q); struct vlan_info *vlan_info;#endif;#if IS_ENABLED(CONFIG_NET_DSA); struct dsa_port *dsa_ptr;#endif;#if IS_ENABLED(CONFIG_TIPC); struct tipc_bearer *tipc_ptr;#endif;#if IS_ENABLED(CONFIG_ATALK); void *atalk_ptr;#endif;#if IS_ENABLED(CONFIG_AX25); struct ax25_dev *ax25_ptr;#endif;#if IS_ENABLED(CONFIG_CFG80211); struct wireless_dev *ieee80211_ptr;#endif;#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN); struct wpan_dev *ieee802154_ptr;#endif;#if IS_ENABLED(CONFIG_MPLS_ROUTING); struct mpls_dev *mpls_ptr;#endif;#if IS_ENABLED(CONFIG_MCTP); struct mctp_dev *mctp_ptr;#endif;#if IS_ENABLED(CONFIG_INET_PSP); struct psp_dev *psp_dev;#endif; const unsigned char *dev_addr; unsigned int num_rx_queues;#define GRO_LEGACY_MAX_SIZE 65536u;#define GRO_MAX_SIZE (8 * 65535u); unsigned int xdp_zc_max_segs; struct netdev_queue *ingress_queue;#ifdef CONFIG_NETFILTER_INGRESS; struct nf_hook_entries *nf_hooks_ingress;#endif; unsigned char broadcast[MAX_ADDR_LEN];#ifdef CONFIG_RFS_ACCEL; struct cpu_rmap *rx_cpu_rmap;#endif; struct hlist_node index_hlist; unsigned int num_tx_queues; struct Qdisc *qdisc; unsigned int tx_queue_len; spinlock_t tx_global_lock; struct xdp_dev_bulk_queue __percpu *xdp_bulkq;#ifdef CONFIG_NET_SCHED; unsigned long qdisc_hash[1 << ((4) - 1)];#endif; struct timer_list watchdog_timer; int watchdog_timeo; u32 proto_down_reason; struct list_head todo_list;#ifdef CONFIG_PCPU_DEV_REFCNT; int __percpu *pcpu_refcnt;#else; refcount_t dev_refcnt;#endif; struct ref_tracker_dir refcnt_tracker; struct list_head link_watch_list; u8 reg_state; bool dismantle; bool moving_ns; bool rtnl_link_initializing; bool needs_free_netdev; void (*priv_destructor)(struct net_device *dev); void *ml_priv; enum netdev_ml_priv_type ml_priv_type; enum netdev_stat_type pcpu_stat_type:8;#if IS_ENABLED(CONFIG_GARP); struct garp_port *garp_port;#endif;#if IS_ENABLED(CONFIG_MRP); struct mrp_port *mrp_port;#endif;#if IS_ENABLED(CONFIG_NET_DROP_MONITOR); struct dm_hw_stat_delta *dm_private;#endif; struct device dev; const struct attribute_group *sysfs_groups[5]; const struct attribute_group *sysfs_rx_queue_group; const struct rtnl_link_ops *rtnl_link_ops; const struct netdev_stat_ops *stat_ops; const struct netdev_queue_mgmt_ops *queue_mgmt_ops;#define GSO_MAX_SEGS 65535u;#define GSO_LEGACY_MAX_SIZE 65536u;#define GSO_MAX_SIZE (8 * GSO_MAX_SEGS);#define TSO_LEGACY_MAX_SIZE 65536;#define TSO_MAX_SIZE UINT_MAX; unsigned int tso_max_size;#define TSO_MAX_SEGS U16_MAX; u16 tso_max_segs;#ifdef CONFIG_DCB; const struct dcbnl_rtnl_ops *dcbnl_ops;#endif; u8 prio_tc_map[TC_BITMASK + 1];#if IS_ENABLED(CONFIG_FCOE); unsigned int fcoe_ddp_xid;#endif;#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO); struct netprio_map *priomap;#endif; struct phy_link_topology *link_topo; struct phy_device *phydev; struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; bool proto_down; bool irq_affinity_auto; bool rx_cpu_rmap_auto; unsigned long see_all_hwtstamp_requests:1; unsigned long change_proto_down:1; unsigned long netns_immutable:1; unsigned long fcoe_mtu:1; struct list_head net_notifier_list;#if IS_ENABLED(CONFIG_MACSEC); const struct macsec_ops *macsec_ops;#endif; const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; struct netdev_config *cfg; struct netdev_config *cfg_pending; struct ethtool_netdev_state *ethtool; struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE]; u8 dev_addr_shadow[MAX_ADDR_LEN]; netdevice_tracker linkwatch_dev_tracker; netdevice_tracker watchdog_dev_tracker; netdevice_tracker dev_registered_tracker; struct rtnl_hw_stats64 *offload_xstats_l3; struct devlink_port *devlink_port;#if IS_ENABLED(CONFIG_DPLL); struct dpll_pin *dpll_pin;#endif;#if IS_ENABLED(CONFIG_PAGE_POOL); struct hlist_head page_pools;#endif; struct dim_irq_moder *irq_moder; u64 max_pacing_offload_horizon; struct napi_config *napi_config; u32 num_napi_configs; u32 napi_defer_hard_irqs; unsigned long gro_flush_timeout; bool up; bool request_ops_lock; struct mutex lock;#if IS_ENABLED(CONFIG_NET_SHAPER); struct net_shaper_hierarchy *net_shaper_hierarchy;#endif; struct hlist_head neighbours[NEIGH_NR_TABLES]; struct hwtstamp_provider *hwprov; u8 priv[] ;};Members
priv_flagsflags invisible to userspace defined as bits, see
enumnetdev_priv_flagsfor the definitionslltxdevice supports lockless Tx. Deprecated for real HWdrivers. Mainly used by logical interfaces, such asbonding and tunnels
netmem_txdevice support netmem_tx.
netdev_opsIncludes several pointers to callbacks,if one wants to override the ndo_*() functions
header_opsIncludes callbacks for creating,parsing,caching,etcof Layer 2 headers.
_txArray of TX queues
gso_partial_featuresvalue(s) from NETIF_F_GSO*
real_num_tx_queuesNumber of TX queues currently active in device
gso_max_sizeMaximum size of generic segmentation offload
gso_ipv4_max_sizeMaximum size of generic segmentation offload,for IPv4.
gso_max_segsMaximum number of segments that can be passed to theNIC for GSO
num_tcNumber of traffic classes in the net device
mtuInterface MTU value
needed_headroomExtra headroom the hardware may need, but not in allcases can this be guaranteed
tc_to_txqXXX: need comments on this one
xps_mapsXXX: need comments on this one
nf_hooks_egressnetfilter hooks executed for egress packets
tcx_egressBPF & clsact qdisc specific data for egress processing
{unnamed_union}anonymous
lstatsLoopback statistics: packets, bytes
tstatsTunnel statistics: RX/TX packets, RX/TX bytes
dstatsDummy statistics: RX/TX/drop packets, RX/TX bytes
stateGeneric network queuing layer state, see netdev_state_t
flagsInterface flags (a la BSD)
hard_header_lenMaximum hardware header length.
featuresCurrently active device features
ip6_ptrIPv6 specific data
xdp_progXDP sockets filter program pointer
ptype_specificDevice-specific, protocol-specific packet handlers
ifindexinterface index
real_num_rx_queuesNumber of RX queues currently active in device
_rxArray of RX queues
gro_max_sizeMaximum size of aggregated packet in genericreceive offload (GRO)
gro_ipv4_max_sizeMaximum size of aggregated packet in genericreceive offload (GRO), for IPv4.
rx_handlerhandler for received packets
rx_handler_dataXXX: need comments on this one
nd_netNetwork namespace this network device is insideprotected bylock
npinfoXXX: need comments on this one
tcx_ingressBPF & clsact qdisc specific data for ingress processing
nameThis is the first field of the “visible” part of this structure(i.e. as seen by users in the “Space.c” file). It is the nameof the interface.
name_nodeName hashlist node
ifaliasSNMP alias
mem_endShared memory end
mem_startShared memory start
base_addrDevice I/O address
dev_listThe global list of network devices
napi_listList entry used for polling NAPI devices
unreg_listList entry when we are unregistering thedevice; see the function unregister_netdev
close_listList entry used when we are closing the device
ptype_allDevice-specific packet handlers for all protocols
adj_listDirectly linked devices, like slaves for bonding
xdp_featuresXDP capability supported by the device
xdp_metadata_opsIncludes pointers to XDP metadata callbacks.
xsk_tx_metadata_opsIncludes pointers to AF_XDP TX metadata callbacks.
gflagsGlobal flags ( kept as legacy )
needed_tailroomExtra tailroom the hardware may need, but not in allcases can this be guaranteed. Some cases also useLL_MAX_HEADER instead to allocate the skb
hw_featuresUser-changeable features
wanted_featuresUser-requested features
vlan_featuresMask of features inheritable by VLAN devices
hw_enc_featuresMask of features inherited by encapsulating devicesThis field indicates what encapsulationoffloads the hardware is capable of doing,and drivers will need to set them appropriately.
mpls_featuresMask of features inheritable by MPLS
min_mtuInterface Minimum MTU value
max_mtuInterface Maximum MTU value
typeInterface hardware type
min_header_lenMinimum hardware header length
name_assign_typenetwork interface name assignment type
groupThe group the device belongs to
statsStatistics struct, which was left as a legacy, usertnl_link_stats64 instead
core_statscore networking counters,do not use this in drivers
carrier_up_countNumber of times the carrier has been up
carrier_down_countNumber of times the carrier has been down
wireless_handlersList of functions to handle Wireless Extensions,instead of ioctl,see <net/iw_handler.h> for details.
ethtool_opsManagement operations
l3mdev_opsLayer 3 master device operations
ndisc_opsIncludes callbacks for different IPv6 neighbourdiscovery handling. Necessary for e.g. 6LoWPAN.
xfrmdev_opsTransformation offload operations
tlsdev_opsTransport Layer Security offload operations
operstateRFC2863 operstate
link_modeMapping policy to operstate
if_portSelectable AUI, TP, ...
dmaDMA channel
perm_addrPermanent hw address
addr_assign_typeHw address assignment type
addr_lenHardware address length
upper_levelMaximum depth level of upper devices.
lower_levelMaximum depth level of lower devices.
threadednapi threaded state.
neigh_priv_lenUsed in
neigh_alloc()dev_idUsed to differentiate devices that sharethe same link layer address
dev_portUsed to differentiate devices that sharethe same function
irqDevice IRQ number
priv_lenSize of the ->priv flexible array
addr_list_lockXXX: need comments on this one
ucunicast mac addresses
mcmulticast mac addresses
dev_addrslist of device hw addresses
queues_ksetGroup of all Kobjects in the Tx and RX queues
unlink_listAs
netif_addr_lock()can be called recursively,keep a list of interfaces to be deleted.promiscuityNumber of times the NIC is told to work inpromiscuous mode; if it becomes 0 the NIC willexit promiscuous mode
allmultiCounter, enables or disables allmulticast mode
uc_promiscCounter that indicates promiscuous modehas been enabled due to the need to listen toadditional unicast addresses in a device thatdoes not implement
ndo_set_rx_mode()nested_levelUsed as a parameter of
spin_lock_nested()ofdev->addr_list_lock.ip_ptrIPv4 specific data
fib_nh_headnexthops associated with this netdev
vlan_infoVLAN info
dsa_ptrdsa specific data
tipc_ptrTIPC specific data
atalk_ptrAppleTalk link
ax25_ptrAX.25 specific data
ieee80211_ptrIEEE 802.11 specific data, assign before registering
ieee802154_ptrIEEE 802.15.4 low-rate Wireless Personal Area Networkdevice struct
mpls_ptrmpls_dev
structpointermctp_ptrMCTP specific data
psp_devPSP crypto device registered for this netdev
dev_addrHw address (before bcast,because most packets are unicast)
num_rx_queuesNumber of RX queuesallocated at
register_netdev()timexdp_zc_max_segsMaximum number of segments supported by AF_XDPzero copy driver
ingress_queueXXX: need comments on this one
nf_hooks_ingressnetfilter hooks executed for ingress packets
broadcasthw bcast address
rx_cpu_rmapCPU reverse-mapping for RX completion interrupts,indexed by RX queue number. Assigned by driver.This must only be set if the ndo_rx_flow_steeroperation is defined
index_hlistDevice index hash chain
num_tx_queuesNumber of TX queues allocated at
alloc_netdev_mq()timeqdiscRoot qdisc from userspace point of view
tx_queue_lenMax frames per queue allowed
tx_global_lockXXX: need comments on this one
xdp_bulkqXDP device bulk queue
qdisc_hashqdisc hash table
watchdog_timerList of timers
watchdog_timeoRepresents the timeout that is used bythe watchdog (see
dev_watchdog())proto_down_reasonreason a netdev interface is held down
todo_listDelayed register/unregister
pcpu_refcntNumber of references to this device
dev_refcntNumber of references to this device
refcnt_trackerTracker directory for tracked references to this device
link_watch_listXXX: need comments on this one
reg_stateRegister/unregister state machine
dismantleDevice is going to be freed
moving_nsdevice is changing netns, protected bylock
rtnl_link_initializingDevice being created, suppress events
needs_free_netdevShould unregister perform free_netdev?
priv_destructorCalled from unregister
ml_privMid-layer private
ml_priv_typeMid-layer private type
pcpu_stat_typeType of device statistics which the core shouldallocate/free: none, lstats, tstats, dstats. nonemeans the driver is handling statistics allocation/freeing internally.
garp_portGARP
mrp_portMRP
dm_privateDrop monitor private
devClass/net/name entry
sysfs_groupsSpace for optional device, statistics and wirelesssysfs groups
sysfs_rx_queue_groupSpace for optional per-rx queue attributes
rtnl_link_opsRtnl_link_ops
stat_opsOptional ops for queue-aware statistics
queue_mgmt_opsOptional ops for queue management
tso_max_sizeDevice (as in HW) limit on the max TSO request size
tso_max_segsDevice (as in HW) limit on the max TSO segment count
dcbnl_opsData Center Bridging netlink ops
prio_tc_mapXXX: need comments on this one
fcoe_ddp_xidMax exchange id for FCoE LRO by ddp
priomapXXX: need comments on this one
link_topoPhysical link topology tracking attached PHYs
phydevPhysical device may attach itselffor hardware timestamping
sfp_busattached
structsfp_busstructure.qdisc_tx_busylocklockdep class annotating Qdisc->busylock spinlock
proto_downprotocol port state information can be sent to theswitch driver and used to set the phys state of theswitch port.
irq_affinity_autodriver wants the core to store and re-assign the IRQaffinity. Set by
netif_enable_irq_affinity(), thenthe driver must create a persistent napi bynetif_napi_add_config()and finally bind the napi toIRQ (vianetif_napi_set_irq()).rx_cpu_rmap_autodriver wants the core to manage the ARFS rmap.Set by calling
netif_enable_cpu_rmap().see_all_hwtstamp_requestsdevice wants to see calls to
ndo_hwtstamp_set()for all timestamp requestsregardless of source, even if those aren’tHWTSTAMP_SOURCE_NETDEVchange_proto_downdevice supports setting carrier via IFLA_PROTO_DOWN
netns_immutableinterface can’t change network namespaces
fcoe_mtudevice supports maximum FCoE MTU, 2158 bytes
net_notifier_listList of per-net netdev notifier blockthat follow this device when it is movedto another network namespace.
macsec_opsMACsec offloading ops
udp_tunnel_nic_infostatic structure describing the UDP tunneloffload capabilities of the device
udp_tunnel_nicUDP tunnel offload state
cfgnet_device queue-related configuration
cfg_pending- same ascfg but when device is being actively
reconfigured includes any changes to the configurationrequested by the user, but which may or may not be rejected.
ethtoolethtool related state
xdp_statestores info on attached XDP BPF programs
dev_addr_shadowCopy ofdev_addr to catch direct writes.
linkwatch_dev_trackerrefcount tracker used by linkwatch.
watchdog_dev_trackerrefcount tracker used by watchdog.
dev_registered_trackertracker for reference held whileregistered
offload_xstats_l3L3 HW stats for this netdevice.
devlink_portPointer to related devlink port structure.Assigned by a driver before netdev registration usingSET_NETDEV_DEVLINK_PORT macro. This pointer is staticduring the time netdevice is registered.
dpll_pinPointer to the SyncE source pin of a DPLL subsystem,where the clock is recovered.
page_poolspage pools created for this netdevice
irq_moderdim parameters used if IS_ENABLED(CONFIG_DIMLIB).
max_pacing_offload_horizonmax EDT offload horizon in nsec.
napi_configAn array of napi_config structures containing per-NAPIsettings.
num_napi_configsnumber of allocated NAPI config structs,always >= max(num_rx_queues, num_tx_queues).
napi_defer_hard_irqsIf not zero, provides a counter that wouldallow to avoid NIC hard IRQ, on busy queues.
gro_flush_timeouttimeout for GRO layer in NAPI
up- copy ofstate’s IFF_UP, but safe to read with justlock.
May report false negatives while the device is being openedor closed (lock does not protect .ndo_open, or .ndo_close).
request_ops_lockrequest the core to run allnetdev_ops andethtool_ops under thelock.
locknetdev-scope lock, protects a small selection of fields.Should always be taken using
netdev_lock()/netdev_unlock()helpers.Drivers are free to use it for other protection.For the drivers that implement shaper or queue API, the scopeof this lock is expanded to cover most ndo/queue/ethtool/sysfsoperations. Drivers may opt-in to this behavior by settingrequest_ops_lock.
lock protection mixes with rtnl_lock in multiple ways, fields areeither:
simply protected by the instancelock;
double protected - writers hold both locks, readers hold either;
ops protected - protected by the lock held around the NDOsand other callbacks, that is the instance lock on devices forwhich
netdev_need_ops_lock()returns true, otherwise by rtnl_lock;double ops protected - always protected by rtnl_lock but fordevices for which
netdev_need_ops_lock()returns true - alsothe instance lock.
- Simply protects:
gro_flush_timeout,napi_defer_hard_irqs,napi_list,net_shaper_hierarchy,reg_state,threaded
- Double protects:
up,moving_ns,nd_net,xdp_features
- Double ops protects:
real_num_rx_queues,real_num_tx_queues
- Also protects some fields in:
structnapi_struct,structnetdev_queue,structnetdev_rx_queue
Ordering: take after rtnl_lock.
net_shaper_hierarchy- data tracking the current shaper status
see include/net/net_shapers.h
neighboursList heads pointing to this device’s neighbours’dev_list, one per address-family.
hwprovTracks which PTP performs hardware packet time stamping.
privFlexible array containing private data
Description
Actually, this whole structure is a big mistake. It mixes I/Odata with strictly “high-level” data, and it has to know aboutalmost every data structure used in the INET module.
interface address info:
FIXME: cleanup
structnet_devicesuch that network protocol infomoves out.
- void*netdev_priv(conststructnet_device*dev)¶
access network device private data
Parameters
conststructnet_device*devnetwork device
Description
Get network device private data
- voidnetif_napi_add(structnet_device*dev,structnapi_struct*napi,int(*poll)(structnapi_struct*,int))¶
initialize a NAPI context
Parameters
structnet_device*devnetwork device
structnapi_struct*napiNAPI context
int(*poll)(structnapi_struct*,int)polling function
Description
netif_napi_add() must be used to initialize a NAPI context prior to callingany of the other NAPI-related functions.
- voidnetif_napi_add_config(structnet_device*dev,structnapi_struct*napi,int(*poll)(structnapi_struct*,int),intindex)¶
initialize a NAPI context with persistent config
Parameters
structnet_device*devnetwork device
structnapi_struct*napiNAPI context
int(*poll)(structnapi_struct*,int)polling function
intindexthe NAPI index
- voidnetif_napi_add_tx(structnet_device*dev,structnapi_struct*napi,int(*poll)(structnapi_struct*,int))¶
initialize a NAPI context to be used for Tx only
Parameters
structnet_device*devnetwork device
structnapi_struct*napiNAPI context
int(*poll)(structnapi_struct*,int)polling function
Description
This variant ofnetif_napi_add() should be used from drivers using NAPIto exclusively poll a TX queue.This will avoid we add it into napi_hash[], thus polluting this hash table.
- void__netif_napi_del(structnapi_struct*napi)¶
remove a NAPI context
Parameters
structnapi_struct*napiNAPI context
Description
Warning: caller must observe RCU grace period before freeing memorycontainingnapi. Drivers might want to call this helper to combineall the needed RCU grace periods into a single one.
- voidnetif_napi_del(structnapi_struct*napi)¶
remove a NAPI context
Parameters
structnapi_struct*napiNAPI context
Description
netif_napi_del()removes a NAPI context from the network device NAPI list
- voidnetif_start_queue(structnet_device*dev)¶
allow transmit
Parameters
structnet_device*devnetwork device
Description
Allow upper layers to call the device hard_start_xmit routine.
- voidnetif_wake_queue(structnet_device*dev)¶
restart transmit
Parameters
structnet_device*devnetwork device
Description
Allow upper layers to call the device hard_start_xmit routine.Used for flow control when transmit resources are available.
- voidnetif_stop_queue(structnet_device*dev)¶
stop transmitted packets
Parameters
structnet_device*devnetwork device
Description
Stop upper layers calling the device hard_start_xmit routine.Used for flow control when transmit resources are unavailable.
- boolnetif_queue_stopped(conststructnet_device*dev)¶
test if transmit queue is flowblocked
Parameters
conststructnet_device*devnetwork device
Description
Test if transmit queue on device is currently unable to send.
- voidnetdev_queue_set_dql_min_limit(structnetdev_queue*dev_queue,unsignedintmin_limit)¶
set dql minimum limit
Parameters
structnetdev_queue*dev_queuepointer to transmit queue
unsignedintmin_limitdql minimum limit
Description
Forcesxmit_more() to return true until the minimum thresholddefined bymin_limit is reached (or until the tx queue isempty). Warning: to be use with care, misuse will impact thelatency.
- voidnetdev_txq_bql_enqueue_prefetchw(structnetdev_queue*dev_queue)¶
prefetch bql data for write
Parameters
structnetdev_queue*dev_queuepointer to transmit queue
Description
BQL enabled drivers might use this helper in theirndo_start_xmit(),to give appropriate hint to the CPU.
- voidnetdev_txq_bql_complete_prefetchw(structnetdev_queue*dev_queue)¶
prefetch bql data for write
Parameters
structnetdev_queue*dev_queuepointer to transmit queue
Description
BQL enabled drivers might use this helper in their TX completion path,to give appropriate hint to the CPU.
- voidnetdev_tx_sent_queue(structnetdev_queue*dev_queue,unsignedintbytes)¶
report the number of bytes queued to a given tx queue
Parameters
structnetdev_queue*dev_queuenetwork device queue
unsignedintbytesnumber of bytes queued to the device queue
Description
Report the number of bytes queued for sending/completion to the networkdevice hardware queue.bytes should be a good approximation and shouldexactly match
netdev_completed_queue()bytes.This is typically called once per packet, fromndo_start_xmit().
- voidnetdev_sent_queue(structnet_device*dev,unsignedintbytes)¶
report the number of bytes queued to hardware
Parameters
structnet_device*devnetwork device
unsignedintbytesnumber of bytes queued to the hardware device queue
Description
Report the number of bytes queued for sending/completion to the networkdevice hardware queue#0.bytes should be a good approximation and shouldexactly match
netdev_completed_queue()bytes.This is typically called once per packet, fromndo_start_xmit().
- voidnetdev_tx_completed_queue(structnetdev_queue*dev_queue,unsignedintpkts,unsignedintbytes)¶
report number of packets/bytes at TX completion.
Parameters
structnetdev_queue*dev_queuenetwork device queue
unsignedintpktsnumber of packets (currently ignored)
unsignedintbytesnumber of bytes dequeued from the device queue
Description
Must be called at most once per TX completion round (and not perindividual packet), so that BQL can adjust its limits appropriately.
- voidnetdev_completed_queue(structnet_device*dev,unsignedintpkts,unsignedintbytes)¶
report bytes and packets completed by device
Parameters
structnet_device*devnetwork device
unsignedintpktsactual number of packets sent over the medium
unsignedintbytesactual number of bytes sent over the medium
Description
Report the number of bytes and packets transmitted by the network devicehardware queue over the physical medium,bytes must exactly match thebytes amount passed to
netdev_sent_queue()
- voidnetdev_tx_reset_subqueue(conststructnet_device*dev,u32qid)¶
reset the BQL stats and state of a netdev queue
Parameters
conststructnet_device*devnetwork device
u32qidstack index of the queue to reset
- voidnetdev_reset_queue(structnet_device*dev_queue)¶
reset the packets and bytes count of a network device
Parameters
structnet_device*dev_queuenetwork device
Description
Reset the bytes and packet count of a network device and clear thesoftware flow control OFF bit for this network device
- u16netdev_cap_txqueue(structnet_device*dev,u16queue_index)¶
check if selected tx queue exceeds device queues
Parameters
structnet_device*devnetwork device
u16queue_indexgiven tx queue index
Description
Returns 0 if given tx queue index >= number of device tx queues,otherwise returns the originally passed tx queue index.
- boolnetif_running(conststructnet_device*dev)¶
test if up
Parameters
conststructnet_device*devnetwork device
Description
Test if the device has been brought up.
- voidnetif_start_subqueue(structnet_device*dev,u16queue_index)¶
allow sending packets on subqueue
Parameters
structnet_device*devnetwork device
u16queue_indexsub queue index
Description
Start individual transmit queue of a device with multiple transmit queues.
- voidnetif_stop_subqueue(structnet_device*dev,u16queue_index)¶
stop sending packets on subqueue
Parameters
structnet_device*devnetwork device
u16queue_indexsub queue index
Description
Stop individual transmit queue of a device with multiple transmit queues.
- bool__netif_subqueue_stopped(conststructnet_device*dev,u16queue_index)¶
test status of subqueue
Parameters
conststructnet_device*devnetwork device
u16queue_indexsub queue index
Description
Check individual transmit queue of a device with multiple transmit queues.
- boolnetif_subqueue_stopped(conststructnet_device*dev,structsk_buff*skb)¶
test status of subqueue
Parameters
conststructnet_device*devnetwork device
structsk_buff*skbsub queue buffer pointer
Description
Check individual transmit queue of a device with multiple transmit queues.
- voidnetif_wake_subqueue(structnet_device*dev,u16queue_index)¶
allow sending packets on subqueue
Parameters
structnet_device*devnetwork device
u16queue_indexsub queue index
Description
Resume individual transmit queue of a device with multiple transmit queues.
- boolnetif_attr_test_mask(unsignedlongj,constunsignedlong*mask,unsignedintnr_bits)¶
Test a CPU or Rx queue set in a mask
Parameters
unsignedlongjCPU/Rx queue index
constunsignedlong*maskbitmask of all cpus/rx queues
unsignedintnr_bitsnumber of bits in the bitmask
Description
Test if a CPU or Rx queue index is set in a mask of all CPU/Rx queues.
- boolnetif_attr_test_online(unsignedlongj,constunsignedlong*online_mask,unsignedintnr_bits)¶
Test for online CPU/Rx queue
Parameters
unsignedlongjCPU/Rx queue index
constunsignedlong*online_maskbitmask for CPUs/Rx queues that are online
unsignedintnr_bitsnumber of bits in the bitmask
Return
true if a CPU/Rx queue is online.
- unsignedintnetif_attrmask_next(intn,constunsignedlong*srcp,unsignedintnr_bits)¶
get the next CPU/Rx queue in a cpu/Rx queues mask
Parameters
intnCPU/Rx queue index
constunsignedlong*srcpthe cpumask/Rx queue mask pointer
unsignedintnr_bitsnumber of bits in the bitmask
Return
next (after n) CPU/Rx queue index in the mask;>= nr_bits if no further CPUs/Rx queues set.
- intnetif_attrmask_next_and(intn,constunsignedlong*src1p,constunsignedlong*src2p,unsignedintnr_bits)¶
get the next CPU/Rx queue in *src1p & *src2p
Parameters
intnCPU/Rx queue index
constunsignedlong*src1pthe first CPUs/Rx queues mask pointer
constunsignedlong*src2pthe second CPUs/Rx queues mask pointer
unsignedintnr_bitsnumber of bits in the bitmask
Return
next (after n) CPU/Rx queue index set in both masks;>= nr_bits if no further CPUs/Rx queues set in both.
- boolnetif_is_multiqueue(conststructnet_device*dev)¶
test if device has multiple transmit queues
Parameters
conststructnet_device*devnetwork device
Description
Check if device has multiple transmit queues
- voiddev_hold(structnet_device*dev)¶
get reference to device
Parameters
structnet_device*devnetwork device
Description
Hold reference to device to keep it from being freed.Try usingnetdev_hold() instead.
- voiddev_put(structnet_device*dev)¶
release reference to device
Parameters
structnet_device*devnetwork device
Description
Release reference to device to allow it to be freed.Try usingnetdev_put() instead.
- voidlinkwatch_sync_dev(structnet_device*dev)¶
sync linkwatch for the given device
Parameters
structnet_device*devnetwork device to sync linkwatch for
Description
Sync linkwatch for the given device, removing it from thepending work list (if queued).
- boolnetif_carrier_ok(conststructnet_device*dev)¶
test if carrier present
Parameters
conststructnet_device*devnetwork device
Description
Check if carrier is present on device
- voidnetif_dormant_on(structnet_device*dev)¶
mark device as dormant.
Parameters
structnet_device*devnetwork device
Description
Mark device as dormant (as per RFC2863).
The dormant state indicates that the relevant interface is notactually in a condition to pass packets (i.e., it is not ‘up’) but isin a “pending” state, waiting for some external event. For “on-demand” interfaces, this new state identifies the situation where theinterface is waiting for events to place it in the up state.
- voidnetif_dormant_off(structnet_device*dev)¶
set device as not dormant.
Parameters
structnet_device*devnetwork device
Description
Device is not in dormant state.
- boolnetif_dormant(conststructnet_device*dev)¶
test if device is dormant
Parameters
conststructnet_device*devnetwork device
Description
Check if device is dormant.
- voidnetif_testing_on(structnet_device*dev)¶
mark device as under test.
Parameters
structnet_device*devnetwork device
Description
Mark device as under test (as per RFC2863).
The testing state indicates that some test(s) must be performed onthe interface. After completion, of the test, the interface statewill change to up, dormant, or down, as appropriate.
- voidnetif_testing_off(structnet_device*dev)¶
set device as not under test.
Parameters
structnet_device*devnetwork device
Description
Device is not in testing state.
- boolnetif_testing(conststructnet_device*dev)¶
test if device is under test
Parameters
conststructnet_device*devnetwork device
Description
Check if device is under test
- boolnetif_oper_up(conststructnet_device*dev)¶
test if device is operational
Parameters
conststructnet_device*devnetwork device
Description
Check if carrier is operational
- boolnetif_device_present(conststructnet_device*dev)¶
is device available or removed
Parameters
conststructnet_device*devnetwork device
Description
Check if device has not been removed from system.
- voidnetif_tx_lock(structnet_device*dev)¶
grab network device transmit lock
Parameters
structnet_device*devnetwork device
Description
Get network device transmit lock
- int__dev_uc_sync(structnet_device*dev,int(*sync)(structnet_device*,constunsignedchar*),int(*unsync)(structnet_device*,constunsignedchar*))¶
Synchronize device’s unicast list
Parameters
structnet_device*devdevice to sync
int(*sync)(structnet_device*,constunsignedchar*)function to call if address should be added
int(*unsync)(structnet_device*,constunsignedchar*)function to call if address should be removed
Description
Add newly added addresses to the interface, and releaseaddresses that have been deleted.
- void__dev_uc_unsync(structnet_device*dev,int(*unsync)(structnet_device*,constunsignedchar*))¶
Remove synchronized addresses from device
Parameters
structnet_device*devdevice to sync
int(*unsync)(structnet_device*,constunsignedchar*)function to call if address should be removed
Description
Remove all addresses that were added to the device by
dev_uc_sync().
- int__dev_mc_sync(structnet_device*dev,int(*sync)(structnet_device*,constunsignedchar*),int(*unsync)(structnet_device*,constunsignedchar*))¶
Synchronize device’s multicast list
Parameters
structnet_device*devdevice to sync
int(*sync)(structnet_device*,constunsignedchar*)function to call if address should be added
int(*unsync)(structnet_device*,constunsignedchar*)function to call if address should be removed
Description
Add newly added addresses to the interface, and releaseaddresses that have been deleted.
- void__dev_mc_unsync(structnet_device*dev,int(*unsync)(structnet_device*,constunsignedchar*))¶
Remove synchronized addresses from device
Parameters
structnet_device*devdevice to sync
int(*unsync)(structnet_device*,constunsignedchar*)function to call if address should be removed
Description
Remove all addresses that were added to the device by
dev_mc_sync().
- structnet_shaper¶
represents a shaping node on the NIC H/W zeroed field are considered not set.
Definition:
struct net_shaper { struct net_shaper_handle parent; struct net_shaper_handle handle; enum net_shaper_metric metric; u64 bw_min; u64 bw_max; u64 burst; u32 priority; u32 weight;};Members
parentUnique identifier for the shaper parent, usually implied
handleUnique identifier for this shaper
metricSpecify if the rate limits refers to PPS or BPS
bw_minMinimum guaranteed rate for this shaper
bw_maxMaximum peak rate allowed for this shaper
burstMaximum burst for the peek rate of this shaper
priorityScheduling priority for this shaper
weightScheduling weight for this shaper
- structnet_shaper_ops¶
Operations on device H/W shapers
Definition:
struct net_shaper_ops { int (*group)(struct net_shaper_binding *binding, int leaves_count, const struct net_shaper *leaves, const struct net_shaper *node, struct netlink_ext_ack *extack); int (*set)(struct net_shaper_binding *binding, const struct net_shaper *shaper, struct netlink_ext_ack *extack); int (*delete)(struct net_shaper_binding *binding, const struct net_shaper_handle *handle, struct netlink_ext_ack *extack); void (*capabilities)(struct net_shaper_binding *binding, enum net_shaper_scope scope, unsigned long *cap);};Members
groupcreate the specified shapers scheduling group
Nest theleaves shapers identified under the *node shaper.All the shapers belong to the device specified bybinding.Theleaves arrays size is specified byleaves_count.Create either theleaves and thenode shaper; or if they alreadyexists, links them together in the desired way.leaves scope must be NET_SHAPER_SCOPE_QUEUE.
setUpdates the specified shaper
Updates or creates theshaper on the device specified bybinding.
deleteRemoves the specified shaper
Removes the shaper configuration as identified by the givenhandleon the device specified bybinding, restoring the default behavior.
capabilitiesget the shaper features supported by the device
Fills the bitmaskcap with the supported capabilities for thespecifiedscope and device specified bybinding.
Description
The operations applies to either net_device and devlink objects.The initial shaping configuration at device initialization is empty:does not constraint the rate in any way.The network core keeps track of the applied user-configuration inthe net_device or devlink structure.The operations are serialized via a per device lock.
Device not supporting any kind of nesting should not provide thegroup operation.
Each shaper is uniquely identified within the device with a ‘handle’comprising the shaper scope and a scope-specific id.
PHY Support¶
- voidphy_print_status(structphy_device*phydev)¶
Convenience function to print out the current phy status
Parameters
structphy_device*phydevthe phy_device struct
- intphy_get_rate_matching(structphy_device*phydev,phy_interface_tiface)¶
determine if rate matching is supported
Parameters
structphy_device*phydevThe phy device to return rate matching for
phy_interface_tifaceThe interface mode to use
Description
This determines the type of rate matching (if any) thatphy supportsusingiface.iface may bePHY_INTERFACE_MODE_NA to determine if anyinterface supports rate matching.
Return
The type of rate matchingphy supports foriface, orRATE_MATCH_NONE.
- intphy_restart_aneg(structphy_device*phydev)¶
restart auto-negotiation
Parameters
structphy_device*phydevtarget phy_device struct
Description
Restart the autonegotiation onphydev. Returns >= 0 on success ornegative errno on error.
- intphy_aneg_done(structphy_device*phydev)¶
return auto-negotiation status
Parameters
structphy_device*phydevtarget phy_device struct
Description
Return the auto-negotiation status from thisphydevReturns > 0 on success or < 0 on error. 0 means that auto-negotiationis still pending.
- boolphy_check_valid(intspeed,intduplex,unsignedlong*features)¶
check if there is a valid PHY setting which matches speed, duplex, and feature mask
Parameters
intspeedspeed to match
intduplexduplex to match
unsignedlong*featuresA mask of the valid settings
Description
Returns true if there is a valid setting, false otherwise.
- intphy_mii_ioctl(structphy_device*phydev,structifreq*ifr,intcmd)¶
generic PHY MII ioctl interface
Parameters
structphy_device*phydevthe phy_device struct
structifreq*ifrstructifreqfor socket ioctl’sintcmdioctl cmd to execute
Description
Note that this function is currently incompatible with thePHYCONTROL layer. It changes registers without regard tocurrent state. Use at own risk.
- intphy_do_ioctl(structnet_device*dev,structifreq*ifr,intcmd)¶
generic ndo_eth_ioctl implementation
Parameters
structnet_device*devthe net_device struct
structifreq*ifrstructifreqfor socket ioctl’sintcmdioctl cmd to execute
- intphy_do_ioctl_running(structnet_device*dev,structifreq*ifr,intcmd)¶
generic ndo_eth_ioctl implementation but test first
Parameters
structnet_device*devthe net_device struct
structifreq*ifrstructifreqfor socket ioctl’sintcmdioctl cmd to execute
Description
Same as phy_do_ioctl, but ensures that net_device is running beforehandling the ioctl.
- voidphy_trigger_machine(structphy_device*phydev)¶
Trigger the state machine to run now
Parameters
structphy_device*phydevthe phy_device struct
- intphy_ethtool_get_strings(structphy_device*phydev,u8*data)¶
Get the statistic counter names
Parameters
structphy_device*phydevthe phy_device struct
u8*dataWhere to put the strings
- intphy_ethtool_get_sset_count(structphy_device*phydev)¶
Get the number of statistic counters
Parameters
structphy_device*phydevthe phy_device struct
- intphy_ethtool_get_stats(structphy_device*phydev,structethtool_stats*stats,u64*data)¶
Get the statistic counters
Parameters
structphy_device*phydevthe phy_device struct
structethtool_stats*statsWhat counters to get
u64*dataWhere to store the counters
- intphy_start_cable_test(structphy_device*phydev,structnetlink_ext_ack*extack)¶
Start a cable test
Parameters
structphy_device*phydevthe phy_device struct
structnetlink_ext_ack*extackextack for reporting useful error messages
- intphy_start_cable_test_tdr(structphy_device*phydev,structnetlink_ext_ack*extack,conststructphy_tdr_config*config)¶
Start a raw TDR cable test
Parameters
structphy_device*phydevthe phy_device struct
structnetlink_ext_ack*extackextack for reporting useful error messages
conststructphy_tdr_config*configConfiguration of the test to run
- unsignedintphy_inband_caps(structphy_device*phydev,phy_interface_tinterface)¶
query which in-band signalling modes are supported
Parameters
structphy_device*phydeva pointer to a
structphy_devicephy_interface_tinterfacethe interface mode for the PHY
Description
Returns zero if it is unknown what in-band signalling is supported by thePHY (e.g. because the PHY driver doesn’t implement the method.) Otherwise,returns a bit mask of the LINK_INBAND_* values fromenumlink_inband_signalling to describe which inband modes are supportedby the PHY for this interface mode.
- intphy_config_inband(structphy_device*phydev,unsignedintmodes)¶
configure the desired PHY in-band mode
Parameters
structphy_device*phydevthe phy_device struct
unsignedintmodesin-band modes to configure
Description
- disables, enables or enables-with-bypass in-band signalling
between the PHY and host system.
Return
zero on success, or negative errno value.
- int_phy_start_aneg(structphy_device*phydev)¶
start auto-negotiation for this PHY device
Parameters
structphy_device*phydevthe phy_device struct
Description
- Sanitizes the settings (if we’re not autonegotiating
them), and then calls the driver’s config_aneg function.If the PHYCONTROL Layer is operating, we change the state toreflect the beginning of Auto-negotiation or forcing.
- intphy_start_aneg(structphy_device*phydev)¶
start auto-negotiation for this PHY device
Parameters
structphy_device*phydevthe phy_device struct
Description
- Sanitizes the settings (if we’re not autonegotiating
them), and then calls the driver’s config_aneg function.If the PHYCONTROL Layer is operating, we change the state toreflect the beginning of Auto-negotiation or forcing.
- intphy_speed_down(structphy_device*phydev,boolsync)¶
set speed to lowest speed supported by both link partners
Parameters
structphy_device*phydevthe phy_device struct
boolsyncperform action synchronously
Description
Typically used to save energy when waiting for a WoL packet
WARNING: Setting sync to false may cause the system being unable to suspendin case the PHY generates an interrupt when finishing the autonegotiation.This interrupt may wake up the system immediately after suspend.Therefore use sync = false only if you’re sure it’s safe with the respectivenetwork chip.
- intphy_speed_up(structphy_device*phydev)¶
(re)set advertised speeds to all supported speeds
Parameters
structphy_device*phydevthe phy_device struct
Description
Used to revert the effect of phy_speed_down
- voidphy_start_machine(structphy_device*phydev)¶
start PHY state machine tracking
Parameters
structphy_device*phydevthe phy_device struct
Description
- The PHY infrastructure can run a state machine
which tracks whether the PHY is starting up, negotiating,etc. This function starts the delayed workqueue which tracksthe state of the PHY. If you want to maintain your own state machine,do not call this function.
- voidphy_error(structphy_device*phydev)¶
enter ERROR state for this PHY device
Parameters
structphy_device*phydevtarget phy_device struct
Description
Moves the PHY to the ERROR state in response to a reador write error, and tells the controller the link is down.Must be called with phydev->lock held.
- voidphy_request_interrupt(structphy_device*phydev)¶
request and enable interrupt for a PHY device
Parameters
structphy_device*phydevtarget phy_device struct
Description
- Request and enable the interrupt for the given PHY.
If this fails, then we set irq to PHY_POLL.This should only be called with a valid IRQ number.
- voidphy_free_interrupt(structphy_device*phydev)¶
disable and free interrupt for a PHY device
Parameters
structphy_device*phydevtarget phy_device struct
Description
- Disable and free the interrupt for the given PHY.
This should only be called with a valid IRQ number.
- voidphy_stop(structphy_device*phydev)¶
Bring down the PHY link, and stop checking the status
Parameters
structphy_device*phydevtarget phy_device struct
- voidphy_start(structphy_device*phydev)¶
start or restart a PHY device
Parameters
structphy_device*phydevtarget phy_device struct
Description
- Indicates the attached device’s readiness to
handle PHY-related work. Used during startup to start thePHY, and after a call to
phy_stop()to resume operation.Also used to indicate the MDIO bus has cleared an errorcondition.
- voidphy_mac_interrupt(structphy_device*phydev)¶
MAC says the link has changed
Parameters
structphy_device*phydevphy_device
structwithchanged link
Description
The MAC layer is able to indicate there has been a change in the PHY linkstatus. Trigger the state machine and work a work queue.
- intphy_loopback(structphy_device*phydev,boolenable,intspeed)¶
Configure loopback mode of PHY
Parameters
structphy_device*phydevtarget phy_device struct
boolenableenable or disable loopback mode
intspeedenable loopback mode with speed
Description
Configure loopback mode of PHY and signal link down and link up if speed ischanging.
Return
0 on success, negative error code on failure.
- intphy_eee_tx_clock_stop_capable(structphy_device*phydev)¶
indicate whether the MAC can stop tx clock
Parameters
structphy_device*phydevtarget phy_device struct
Description
Indicate whether the MAC can disable the transmit xMII clock while in LPIstate. Returns 1 if the MAC may stop the transmit clock, 0 if the MAC mustnot stop the transmit clock, or negative error.
- intphy_eee_rx_clock_stop(structphy_device*phydev,boolclk_stop_enable)¶
configure PHY receive clock in LPI
Parameters
structphy_device*phydevtarget phy_device struct
boolclk_stop_enableflag to indicate whether the clock can be stopped
Description
Configure whether the PHY can disable its receive clock during LPI mode,See IEEE 802.3 sections 22.2.2.2, 35.2.2.10, and 45.2.3.1.4.
Return
0 or negative error.
- intphy_init_eee(structphy_device*phydev,boolclk_stop_enable)¶
init and check the EEE feature
Parameters
structphy_device*phydevtarget phy_device struct
boolclk_stop_enablePHY may stop the clock during LPI
Description
it checks if the Energy-Efficient Ethernet (EEE)is supported by looking at the MMD registers 3.20 and 7.60/61and it programs the MMD register 3.0 setting the “Clock stop enable”bit if required.
- intphy_get_eee_err(structphy_device*phydev)¶
report the EEE wake error count
Parameters
structphy_device*phydevtarget phy_device struct
Description
it is to report the number of time where the PHYfailed to complete its normal wake sequence.
- intphy_ethtool_get_eee(structphy_device*phydev,structethtool_keee*data)¶
get EEE supported and status
Parameters
structphy_device*phydevtarget phy_device struct
structethtool_keee*dataethtool_keee data
Description
get the current EEE settings, filling in all members ofdata.
- intphy_ethtool_set_eee(structphy_device*phydev,structethtool_keee*data)¶
set EEE supported and status
Parameters
structphy_device*phydevtarget phy_device struct
structethtool_keee*dataethtool_keee data
Description
it is to program the Advertisement EEE register.
- intphy_ethtool_set_wol(structphy_device*phydev,structethtool_wolinfo*wol)¶
Configure Wake On LAN
Parameters
structphy_device*phydevtarget phy_device struct
structethtool_wolinfo*wolConfiguration requested
- voidphy_ethtool_get_wol(structphy_device*phydev,structethtool_wolinfo*wol)¶
Get the current Wake On LAN configuration
Parameters
structphy_device*phydevtarget phy_device struct
structethtool_wolinfo*wolStore the current configuration here
- intphy_ethtool_nway_reset(structnet_device*ndev)¶
Restart auto negotiation
Parameters
structnet_device*ndevNetwork device to restart autoneg for
- intphy_config_interrupt(structphy_device*phydev,boolinterrupts)¶
configure the PHY device for the requested interrupts
Parameters
structphy_device*phydevthe phy_device struct
boolinterruptsinterrupt flags to configure for thisphydev
Description
Returns 0 on success or < 0 on error.
- unsignedintphy_supported_speeds(structphy_device*phy,unsignedint*speeds,unsignedintsize)¶
return all speeds currently supported by a phy device
Parameters
structphy_device*phyThe phy device to return supported speeds of.
unsignedint*speedsbuffer to store supported speeds in.
unsignedintsizesize of speeds buffer.
Description
Returns the number of supported speeds, and fills the speedsbuffer with the supported speeds. If speeds buffer is too small to containall currently supported speeds, will return as many speeds as can fit.
- voidphy_sanitize_settings(structphy_device*phydev)¶
make sure the PHY is set to supported speed and duplex
Parameters
structphy_device*phydevthe target phy_device struct
Description
- Make sure the PHY is set to supported speeds and
duplexes. Drop down by one in this order: 1000/FULL,1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF.
- int__phy_hwtstamp_get(structphy_device*phydev,structkernel_hwtstamp_config*config)¶
Get hardware timestamping configuration from PHY
Parameters
structphy_device*phydevthe PHY device structure
structkernel_hwtstamp_config*configstructure holding the timestamping configuration
Description
Query the PHY device for its current hardware timestamping configuration.
- int__phy_hwtstamp_set(structphy_device*phydev,structkernel_hwtstamp_config*config,structnetlink_ext_ack*extack)¶
Modify PHY hardware timestamping configuration
Parameters
structphy_device*phydevthe PHY device structure
structkernel_hwtstamp_config*configstructure holding the timestamping configuration
structnetlink_ext_ack*extacknetlink extended ack structure, for error reporting
- voidphy_queue_state_machine(structphy_device*phydev,unsignedlongjiffies)¶
Trigger the state machine to run soon
Parameters
structphy_device*phydevthe phy_device struct
unsignedlongjiffiesRun the state machine after these jiffies
- void__phy_ethtool_get_phy_stats(structphy_device*phydev,structethtool_eth_phy_stats*phy_stats,structethtool_phy_stats*phydev_stats)¶
Retrieve standardized PHY statistics
Parameters
structphy_device*phydevPointer to the PHY device
structethtool_eth_phy_stats*phy_statsPointer to ethtool_eth_phy_stats structure
structethtool_phy_stats*phydev_statsPointer to ethtool_phy_stats structure
Description
Fetches PHY statistics using a kernel-defined interface for consistentdiagnostics. Unlikephy_ethtool_get_stats(), which allows custom stats,this function enforces a standardized format for better interoperability.
- void__phy_ethtool_get_link_ext_stats(structphy_device*phydev,structethtool_link_ext_stats*link_stats)¶
Retrieve extended link statistics for a PHY
Parameters
structphy_device*phydevPointer to the PHY device
structethtool_link_ext_stats*link_statsPointer to the structure to store extended link statistics
Description
Populates the ethtool_link_ext_stats structure with link down event countsand additional driver-specific link statistics, if available.
- intphy_ethtool_get_plca_cfg(structphy_device*phydev,structphy_plca_cfg*plca_cfg)¶
Get PLCA RS configuration
Parameters
structphy_device*phydevthe phy_device struct
structphy_plca_cfg*plca_cfgwhere to store the retrieved configuration
Description
Retrieve the PLCA configuration from the PHY. Return 0 on success or anegative value if an error occurred.
- intplca_check_valid(structphy_device*phydev,conststructphy_plca_cfg*plca_cfg,structnetlink_ext_ack*extack)¶
Check PLCA configuration before enabling
Parameters
structphy_device*phydevthe phy_device struct
conststructphy_plca_cfg*plca_cfgcurrent PLCA configuration
structnetlink_ext_ack*extackextack for reporting useful error messages
Description
Checks whether the PLCA and PHY configuration are consistent and it is safeto enable PLCA. Returns 0 on success or a negative value if the PLCA or PHYconfiguration is not consistent.
- intphy_ethtool_set_plca_cfg(structphy_device*phydev,conststructphy_plca_cfg*plca_cfg,structnetlink_ext_ack*extack)¶
Set PLCA RS configuration
Parameters
structphy_device*phydevthe phy_device struct
conststructphy_plca_cfg*plca_cfgnew PLCA configuration to apply
structnetlink_ext_ack*extackextack for reporting useful error messages
Description
Sets the PLCA configuration in the PHY. Return 0 on success or anegative value if an error occurred.
- intphy_ethtool_get_plca_status(structphy_device*phydev,structphy_plca_status*plca_st)¶
Get PLCA RS status information
Parameters
structphy_device*phydevthe phy_device struct
structphy_plca_status*plca_stwhere to store the retrieved status information
Description
Retrieve the PLCA status information from the PHY. Return 0 on success or anegative value if an error occurred.
- intphy_check_link_status(structphy_device*phydev)¶
check link status and set state accordingly
Parameters
structphy_device*phydevthe phy_device struct
Description
Check for link and whether autoneg was triggered / is runningand set state accordingly
- voidphy_stop_machine(structphy_device*phydev)¶
stop the PHY state machine tracking
Parameters
structphy_device*phydevtarget phy_device struct
Description
- Stops the state machine delayed workqueue, sets the
state to UP (unless it wasn’t up yet). This function must becalled BEFORE phy_detach.
- intphy_disable_interrupts(structphy_device*phydev)¶
Disable the PHY interrupts from the PHY side
Parameters
structphy_device*phydevtarget phy_device struct
- irqreturn_tphy_interrupt(intirq,void*phy_dat)¶
PHY interrupt handler
Parameters
intirqinterrupt line
void*phy_datphy_device pointer
Description
Handle PHY interrupt
- intphy_enable_interrupts(structphy_device*phydev)¶
Enable the interrupts from the PHY side
Parameters
structphy_device*phydevtarget phy_device struct
- intphy_update_stats(structphy_device*phydev)¶
Update PHY device statistics if supported.
Parameters
structphy_device*phydevPointer to the PHY device structure.
Description
If the PHY driver provides an update_stats callback, this functioninvokes it to update the PHY statistics. If not, it returns 0.
Return
0 on success, or a negative error code if the callback fails.
- unsignedintphy_get_next_update_time(structphy_device*phydev)¶
Determine the next PHY update time
Parameters
structphy_device*phydevPointer to the phy_device structure
Description
This function queries the PHY driver to get the time for the next pollingevent. If the driver does not implement the callback, a default value isused.
Return
The time for the next polling event in jiffies
- voidphy_state_machine(structwork_struct*work)¶
Handle the state machine
Parameters
structwork_struct*workwork_struct that describes the work to be done
- voidphy_ethtool_set_eee_noneg(structphy_device*phydev,conststructeee_config*old_cfg)¶
Adjusts MAC LPI configuration without PHY renegotiation
Parameters
structphy_device*phydevpointer to the target PHY device structure
conststructeee_config*old_cfgpointer to the eee_config structure containing the old EEE settings
Description
This function updates the Energy Efficient Ethernet (EEE) configurationfor cases where only the MAC’s Low Power Idle (LPI) configuration changes,without triggering PHY renegotiation. It ensures that the MAC is properlyinformed of the new LPI settings by cycling the link down and up, whichis necessary for the MAC to adopt the new configuration. This adjustmentis done only if there is a change in the tx_lpi_enabled or tx_lpi_timerconfiguration.
- constchar*phy_speed_to_str(intspeed)¶
Return a string representing the PHY link speed
Parameters
intspeedSpeed of the link
- constchar*phy_duplex_to_str(unsignedintduplex)¶
Return string describing the duplex
Parameters
unsignedintduplexDuplex setting to describe
- constchar*phy_rate_matching_to_str(intrate_matching)¶
Return a string describing the rate matching
Parameters
intrate_matchingType of rate matching to describe
- phy_interface_tphy_fix_phy_mode_for_mac_delays(phy_interface_tinterface,boolmac_txid,boolmac_rxid)¶
Convenience function for fixing PHY mode based on whether mac adds internal delay
Parameters
phy_interface_tinterfaceThe current interface mode of the port
boolmac_txidTrue if the mac adds internal tx delay
boolmac_rxidTrue if the mac adds internal rx delay
Return
fixed PHY mode, or PHY_INTERFACE_MODE_NA if the interface cannot apply the internal delay
- intphy_interface_num_ports(phy_interface_tinterface)¶
Return the number of links that can be carried by a given MAC-PHY physical link. Returns 0 if this is unknown, the number of links else.
Parameters
phy_interface_tinterfaceThe interface mode we want to get the number of ports
- voidphy_set_max_speed(structphy_device*phydev,u32max_speed)¶
Set the maximum speed the PHY should support
Parameters
structphy_device*phydevThe phy_device struct
u32max_speedMaximum speed
Description
The PHY might be more capable than the MAC. For example a Fast Ethernetis connected to a 1G PHY. This function allows the MAC to indicate itsmaximum speed, and so limit what the PHY will advertise.
- voidphy_resolve_aneg_pause(structphy_device*phydev)¶
Determine pause autoneg results
Parameters
structphy_device*phydevThe phy_device struct
Description
Once autoneg has completed the local pause settings can beresolved. Determine if pause and asymmetric pause should be usedby the MAC.
- voidphy_resolve_aneg_linkmode(structphy_device*phydev)¶
resolve the advertisements into PHY settings
Parameters
structphy_device*phydevThe phy_device struct
Description
Resolve our and the link partner advertisements into their correspondingspeed and duplex. If full duplex was negotiated, extract the pause modefrom the link partner mask.
- int__phy_read_mmd(structphy_device*phydev,intdevad,u32regnum)¶
Convenience function for reading a register from an MMD on a given PHY.
Parameters
structphy_device*phydevThe phy_device struct
intdevadThe MMD to read from (0..31)
u32regnumThe register on the MMD to read (0..65535)
Description
Same rules as for__phy_read();
- intphy_read_mmd(structphy_device*phydev,intdevad,u32regnum)¶
Convenience function for reading a register from an MMD on a given PHY.
Parameters
structphy_device*phydevThe phy_device struct
intdevadThe MMD to read from
u32regnumThe register on the MMD to read
Description
Same rules as forphy_read();
- int__phy_write_mmd(structphy_device*phydev,intdevad,u32regnum,u16val)¶
Convenience function for writing a register on an MMD on a given PHY.
Parameters
structphy_device*phydevThe phy_device struct
intdevadThe MMD to read from
u32regnumThe register on the MMD to read
u16valvalue to write toregnum
Description
Same rules as for__phy_write();
- intphy_write_mmd(structphy_device*phydev,intdevad,u32regnum,u16val)¶
Convenience function for writing a register on an MMD on a given PHY.
Parameters
structphy_device*phydevThe phy_device struct
intdevadThe MMD to read from
u32regnumThe register on the MMD to read
u16valvalue to write toregnum
Description
Same rules as forphy_write();
- intphy_modify_changed(structphy_device*phydev,u32regnum,u16mask,u16set)¶
Function for modifying a PHY register
Parameters
structphy_device*phydevthe phy_device struct
u32regnumregister number to modify
u16maskbit mask of bits to clear
u16setnew value of bits set in mask to write toregnum
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
Returns negative errno, 0 if there was no change, and 1 in case of change
- int__phy_modify(structphy_device*phydev,u32regnum,u16mask,u16set)¶
Convenience function for modifying a PHY register
Parameters
structphy_device*phydevthe phy_device struct
u32regnumregister number to modify
u16maskbit mask of bits to clear
u16setnew value of bits set in mask to write toregnum
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- intphy_modify(structphy_device*phydev,u32regnum,u16mask,u16set)¶
Convenience function for modifying a given PHY register
Parameters
structphy_device*phydevthe phy_device struct
u32regnumregister number to write
u16maskbit mask of bits to clear
u16setnew value of bits set in mask to write toregnum
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- int__phy_modify_mmd_changed(structphy_device*phydev,intdevad,u32regnum,u16mask,u16set)¶
Function for modifying a register on MMD
Parameters
structphy_device*phydevthe phy_device struct
intdevadthe MMD containing register to modify
u32regnumregister number to modify
u16maskbit mask of bits to clear
u16setnew value of bits set in mask to write toregnum
Description
Unlocked helper function which allows a MMD register to be modified asnew register value = (old register value & ~mask) | set
Returns negative errno, 0 if there was no change, and 1 in case of change
- intphy_modify_mmd_changed(structphy_device*phydev,intdevad,u32regnum,u16mask,u16set)¶
Function for modifying a register on MMD
Parameters
structphy_device*phydevthe phy_device struct
intdevadthe MMD containing register to modify
u32regnumregister number to modify
u16maskbit mask of bits to clear
u16setnew value of bits set in mask to write toregnum
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
Returns negative errno, 0 if there was no change, and 1 in case of change
- int__phy_modify_mmd(structphy_device*phydev,intdevad,u32regnum,u16mask,u16set)¶
Convenience function for modifying a register on MMD
Parameters
structphy_device*phydevthe phy_device struct
intdevadthe MMD containing register to modify
u32regnumregister number to modify
u16maskbit mask of bits to clear
u16setnew value of bits set in mask to write toregnum
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- intphy_modify_mmd(structphy_device*phydev,intdevad,u32regnum,u16mask,u16set)¶
Convenience function for modifying a register on MMD
Parameters
structphy_device*phydevthe phy_device struct
intdevadthe MMD containing register to modify
u32regnumregister number to modify
u16maskbit mask of bits to clear
u16setnew value of bits set in mask to write toregnum
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- intphy_save_page(structphy_device*phydev)¶
take the bus lock and save the current page
Parameters
structphy_device*phydeva pointer to a
structphy_device
Description
Take the MDIO bus lock, and return the current page number. On error,returns a negative errno.phy_restore_page() must always be calledafter this, irrespective of success or failure of this call.
- intphy_select_page(structphy_device*phydev,intpage)¶
take the bus lock, save the current page, and set a page
Parameters
structphy_device*phydeva pointer to a
structphy_deviceintpagedesired page
Description
Take the MDIO bus lock to protect against concurrent access, save thecurrent PHY page, and set the current page. On error, returns anegative errno, otherwise returns the previous page number.phy_restore_page() must always be called after this, irrespectiveof success or failure of this call.
- intphy_restore_page(structphy_device*phydev,intoldpage,intret)¶
restore the page register and release the bus lock
Parameters
structphy_device*phydeva pointer to a
structphy_deviceintoldpagethe old page, return value from
phy_save_page()orphy_select_page()intretoperation’s return code
Description
Release the MDIO bus lock, restoringoldpage if it is a valid page.This function propagates the earliest error code from the group ofoperations.
Return
oldpage if it was a negative value, otherwiseret if it was a negative errno value, otherwisephy_write_page()’s negative value if it were in error, otherwiseret.
- intphy_read_paged(structphy_device*phydev,intpage,u32regnum)¶
Convenience function for reading a paged register
Parameters
structphy_device*phydeva pointer to a
structphy_deviceintpagethe page for the phy
u32regnumregister number
Description
Same rules as forphy_read().
- intphy_write_paged(structphy_device*phydev,intpage,u32regnum,u16val)¶
Convenience function for writing a paged register
Parameters
structphy_device*phydeva pointer to a
structphy_deviceintpagethe page for the phy
u32regnumregister number
u16valvalue to write
Description
Same rules as forphy_write().
- intphy_modify_paged_changed(structphy_device*phydev,intpage,u32regnum,u16mask,u16set)¶
Function for modifying a paged register
Parameters
structphy_device*phydeva pointer to a
structphy_deviceintpagethe page for the phy
u32regnumregister number
u16maskbit mask of bits to clear
u16setbit mask of bits to set
Description
Returns negative errno, 0 if there was no change, and 1 in case of change
- intphy_modify_paged(structphy_device*phydev,intpage,u32regnum,u16mask,u16set)¶
Convenience function for modifying a paged register
Parameters
structphy_device*phydeva pointer to a
structphy_deviceintpagethe page for the phy
u32regnumregister number
u16maskbit mask of bits to clear
u16setbit mask of bits to set
Description
Same rules as forphy_read() andphy_write().
- intgenphy_c45_pma_resume(structphy_device*phydev)¶
wakes up the PMA module
Parameters
structphy_device*phydevtarget phy_device struct
- intgenphy_c45_pma_suspend(structphy_device*phydev)¶
suspends the PMA module
Parameters
structphy_device*phydevtarget phy_device struct
- intgenphy_c45_pma_baset1_setup_master_slave(structphy_device*phydev)¶
configures forced master/slave role of BaseT1 devices.
Parameters
structphy_device*phydevtarget phy_device struct
- intgenphy_c45_pma_setup_forced(structphy_device*phydev)¶
configures a forced speed
Parameters
structphy_device*phydevtarget phy_device struct
- intgenphy_c45_an_config_aneg(structphy_device*phydev)¶
configure advertisement registers
Parameters
structphy_device*phydevtarget phy_device struct
Description
Configure advertisement registers based on modes set in phydev->advertising
Returns negative errno code on failure, 0 if advertisement didn’t change,or 1 if advertised modes changed.
- intgenphy_c45_an_disable_aneg(structphy_device*phydev)¶
disable auto-negotiation
Parameters
structphy_device*phydevtarget phy_device struct
Description
Disable auto-negotiation in the Clause 45 PHY. The link parametersare controlled through the PMA/PMD MMD registers.
Returns zero on success, negative errno code on failure.
- intgenphy_c45_restart_aneg(structphy_device*phydev)¶
Enable and restart auto-negotiation
Parameters
structphy_device*phydevtarget phy_device struct
Description
This assumes that the auto-negotiation MMD is present.
Enable and restart auto-negotiation.
- intgenphy_c45_check_and_restart_aneg(structphy_device*phydev,boolrestart)¶
Enable and restart auto-negotiation
Parameters
structphy_device*phydevtarget phy_device struct
boolrestartwhether aneg restart is requested
Description
This assumes that the auto-negotiation MMD is present.
Check, and restart auto-negotiation if needed.
- intgenphy_c45_aneg_done(structphy_device*phydev)¶
return auto-negotiation complete status
Parameters
structphy_device*phydevtarget phy_device struct
Description
This assumes that the auto-negotiation MMD is present.
Reads the status register from the auto-negotiation MMD, returning:- positive if auto-negotiation is complete- negative errno code on error- zero otherwise
- intgenphy_c45_read_link(structphy_device*phydev)¶
read the overall link status from the MMDs
Parameters
structphy_device*phydevtarget phy_device struct
Description
Read the link status from the specified MMDs, and if they all indicatethat the link is up, set phydev->link to 1. If an error is encountered,a negative errno will be returned, otherwise zero.
- intgenphy_c45_read_lpa(structphy_device*phydev)¶
read the link partner advertisement and pause
Parameters
structphy_device*phydevtarget phy_device struct
Description
Read the Clause 45 defined base (7.19) and 10G (7.33) status registers,filling in the link partner advertisement, pause and asym_pause membersinphydev. This assumes that the auto-negotiation MMD is present, andthe backplane bit (7.48.0) is clear. Clause 45 PHY drivers are expectedto fill in the remainder of the link partner advert from vendor registers.
- intgenphy_c45_pma_baset1_read_master_slave(structphy_device*phydev)¶
read forced master/slave configuration
Parameters
structphy_device*phydevtarget phy_device struct
- intgenphy_c45_read_pma(structphy_device*phydev)¶
read link speed etc from PMA
Parameters
structphy_device*phydevtarget phy_device struct
- intgenphy_c45_read_mdix(structphy_device*phydev)¶
read mdix status from PMA
Parameters
structphy_device*phydevtarget phy_device struct
- intgenphy_c45_read_eee_abilities(structphy_device*phydev)¶
read supported EEE link modes
Parameters
structphy_device*phydevtarget phy_device struct
- intgenphy_c45_an_config_eee_aneg(structphy_device*phydev)¶
configure EEE advertisement
Parameters
structphy_device*phydevtarget phy_device struct
- intgenphy_c45_pma_baset1_read_abilities(structphy_device*phydev)¶
read supported baset1 link modes from PMA
Parameters
structphy_device*phydevtarget phy_device struct
Description
Read the supported link modes from the extended BASE-T1 ability register
- intgenphy_c45_pma_read_ext_abilities(structphy_device*phydev)¶
read supported link modes from PMA
Parameters
structphy_device*phydevtarget phy_device struct
Description
Read the supported link modes from the PMA/PMD extended ability register(Register 1.11).
- intgenphy_c45_pma_read_abilities(structphy_device*phydev)¶
read supported link modes from PMA
Parameters
structphy_device*phydevtarget phy_device struct
Description
Read the supported link modes from the PMA Status 2 (1.8) register. If bit1.8.9 is set, the list of supported modes is build using the values in thePMA Extended Abilities (1.11) register, indicating 1000BASET an 10G relatedmodes. If bit 1.11.14 is set, then the list is also extended with the modesin the 2.5G/5G PMA Extended register (1.21), indicating if 2.5GBASET and5GBASET are supported.
- intgenphy_c45_read_status(structphy_device*phydev)¶
read PHY status
Parameters
structphy_device*phydevtarget phy_device struct
Description
Reads status from PHY and sets phy_device members accordingly.
- intgenphy_c45_config_aneg(structphy_device*phydev)¶
restart auto-negotiation or forced setup
Parameters
structphy_device*phydevtarget phy_device struct
Description
- If auto-negotiation is enabled, we configure the
advertising, and then restart auto-negotiation. If it is notenabled, then we force a configuration.
- intgenphy_c45_fast_retrain(structphy_device*phydev,boolenable)¶
configure fast retrain registers
Parameters
structphy_device*phydevtarget phy_device struct
boolenableenable fast retrain or not
Description
- If fast-retrain is enabled, we configure PHY as
advertising fast retrain capable and THP Bypass Request, thenenable fast retrain. If it is not enabled, we configure fastretrain disabled.
- intgenphy_c45_plca_get_cfg(structphy_device*phydev,structphy_plca_cfg*plca_cfg)¶
get PLCA configuration from standard registers
Parameters
structphy_device*phydevtarget phy_device struct
structphy_plca_cfg*plca_cfgoutput structure to store the PLCA configuration
Description
- if the PHY complies to the Open Alliance TC14 10BASE-T1S PLCA
Management Registers specifications, this function can be used to retrievethe current PLCA configuration from the standard registers in MMD 31.
- intgenphy_c45_plca_set_cfg(structphy_device*phydev,conststructphy_plca_cfg*plca_cfg)¶
set PLCA configuration using standard registers
Parameters
structphy_device*phydevtarget phy_device struct
conststructphy_plca_cfg*plca_cfgstructure containing the PLCA configuration. Fields set to -1 arenot to be changed.
Description
- if the PHY complies to the Open Alliance TC14 10BASE-T1S PLCA
Management Registers specifications, this function can be used to modifythe PLCA configuration using the standard registers in MMD 31.
- intgenphy_c45_plca_get_status(structphy_device*phydev,structphy_plca_status*plca_st)¶
get PLCA status from standard registers
Parameters
structphy_device*phydevtarget phy_device struct
structphy_plca_status*plca_stoutput structure to store the PLCA status
Description
- if the PHY complies to the Open Alliance TC14 10BASE-T1S PLCA
Management Registers specifications, this function can be used to retrievethe current PLCA status information from the standard registers in MMD 31.
- intgenphy_c45_eee_is_active(structphy_device*phydev,unsignedlong*lp)¶
get EEE status
Parameters
structphy_device*phydevtarget phy_device struct
unsignedlong*lpvariable to store LP advertised linkmodes
Description
this function will read link partner PHY advertisementand compare it to local advertisement to return current EEE state.
- intgenphy_c45_ethtool_get_eee(structphy_device*phydev,structethtool_keee*data)¶
get EEE supported and status
Parameters
structphy_device*phydevtarget phy_device struct
structethtool_keee*dataethtool_keee data
Description
it reports the Supported/Advertisement/LP Advertisementcapabilities.
- intgenphy_c45_ethtool_set_eee(structphy_device*phydev,structethtool_keee*data)¶
set EEE supported and status
Parameters
structphy_device*phydevtarget phy_device struct
structethtool_keee*dataethtool_keee data
Description
sets the Supported/Advertisement/LP Advertisementcapabilities. If eee_enabled is false, no links modes areadvertised, but the previously advertised link modes areretained. This allows EEE to be enabled/disabled in anon-destructive way.Returns either error code, 0 if there was no change, or positivevalue if there was a change which triggered auto-neg.
- intgenphy_c45_oatc14_cable_test_get_status(structphy_device*phydev,bool*finished)¶
Get status of OATC14 10Base-T1S PHY cable test.
Parameters
structphy_device*phydevpointer to the PHY device structure
bool*finishedpointer to a boolean set true if the test is complete
Description
Retrieves the current status of the OATC14 10Base-T1S PHY cable test.This function reads the OATC14 HDD register to determine whether the testresults are valid and whether the test has finished.
If the test is complete, the function reports the cable test result viathe ethtool cable test interface usingethnl_cable_test_result(), and thenclears the test control bit in the PHY register to reset the test state.
Return
0 on success, or a negative error code on failure (e.g. registerread/write error).
- intgenphy_c45_oatc14_cable_test_start(structphy_device*phydev)¶
Start a cable test on an OATC14 10Base-T1S PHY.
Parameters
structphy_device*phydevPointer to the PHY device structure
Description
This function initiates a cable diagnostic test on a Clause 45 OATC1410Base-T1S capable PHY device. It first reads the PHY’s advanced diagnosticcapability register to check if High Definition Diagnostics (HDD) mode issupported. If the PHY does not report HDD capability, cable testing is notsupported and the function returns -EOPNOTSUPP.
For PHYs that support HDD, the function sets the appropriate control bits inthe OATC14_HDD register to enable and start the cable diagnostic test.
Return
0 on success
-EOPNOTSUPP if the PHY does not support HDD capability
A negative error code on I/O or register access failures
- intgenphy_c45_oatc14_get_sqi_max(structphy_device*phydev)¶
Get maximum supported SQI or SQI+ level of OATC14 10Base-T1S PHY
Parameters
structphy_device*phydevpointer to the PHY device structure
Description
This function returns the maximum supported Signal Quality Indicator (SQI) orSQI+ level. The SQI capability is updated on first invocation if it has notalready been updated.
Return
Maximum SQI/SQI+ level supported
Negative errno on capability read failure
- intgenphy_c45_oatc14_get_sqi(structphy_device*phydev)¶
Get Signal Quality Indicator (SQI) from an OATC14 10Base-T1S PHY
Parameters
structphy_device*phydevpointer to the PHY device structure
Description
This function reads the SQI+ or SQI value from an OATC14-compatible10Base-T1S PHY. If SQI+ capability is supported, the function returns theextended SQI+ value; otherwise, it returns the basic SQI value. The SQIcapability is updated on first invocation if it has not already been updated.
Return
SQI/SQI+ value on success
Negative errno on read failure
- enumphy_interface_t¶
Interface Mode definitions
Constants
PHY_INTERFACE_MODE_NANot Applicable - don’t touch
PHY_INTERFACE_MODE_INTERNALNo interface, MAC and PHY combined
PHY_INTERFACE_MODE_MIIMedia-independent interface
PHY_INTERFACE_MODE_GMIIGigabit media-independent interface
PHY_INTERFACE_MODE_SGMIISerial gigabit media-independent interface
PHY_INTERFACE_MODE_TBITen Bit Interface
PHY_INTERFACE_MODE_REVMIIReverse Media Independent Interface
PHY_INTERFACE_MODE_RMIIReduced Media Independent Interface
PHY_INTERFACE_MODE_REVRMIIReduced Media Independent Interface in PHY role
PHY_INTERFACE_MODE_RGMIIReduced gigabit media-independent interface
PHY_INTERFACE_MODE_RGMII_IDRGMII with Internal RX+TX delay
PHY_INTERFACE_MODE_RGMII_RXIDRGMII with Internal RX delay
PHY_INTERFACE_MODE_RGMII_TXIDRGMII with Internal TX delay
PHY_INTERFACE_MODE_RTBIReduced TBI
PHY_INTERFACE_MODE_SMIISerial MII
PHY_INTERFACE_MODE_XGMII10 gigabit media-independent interface
PHY_INTERFACE_MODE_XLGMII40 gigabit media-independent interface
PHY_INTERFACE_MODE_MOCAMultimedia over Coax
PHY_INTERFACE_MODE_PSGMIIPenta SGMII
PHY_INTERFACE_MODE_QSGMIIQuad SGMII
PHY_INTERFACE_MODE_TRGMIITurbo RGMII
PHY_INTERFACE_MODE_100BASEX100 BaseX
PHY_INTERFACE_MODE_1000BASEX1000 BaseX
PHY_INTERFACE_MODE_2500BASEX2500 BaseX
PHY_INTERFACE_MODE_5GBASER5G BaseR
PHY_INTERFACE_MODE_RXAUIReduced XAUI
PHY_INTERFACE_MODE_XAUI10 Gigabit Attachment Unit Interface
PHY_INTERFACE_MODE_10GBASER10G BaseR
PHY_INTERFACE_MODE_25GBASER25G BaseR
PHY_INTERFACE_MODE_USXGMIIUniversal Serial 10GE MII
PHY_INTERFACE_MODE_10GKR10GBASE-KR - with Clause 73 AN
PHY_INTERFACE_MODE_QUSGMIIQuad Universal SGMII
PHY_INTERFACE_MODE_1000BASEKX1000Base-KX - with Clause 73 AN
PHY_INTERFACE_MODE_10G_QXGMII10G-QXGMII - 4 ports over 10G USXGMII
PHY_INTERFACE_MODE_50GBASER50GBase-R - with Clause 134 FEC
PHY_INTERFACE_MODE_LAUI50 Gigabit Attachment Unit Interface
PHY_INTERFACE_MODE_100GBASEP100GBase-P - with Clause 134 FEC
PHY_INTERFACE_MODE_MIILITEMII-Lite - MII without RXER TXER CRS COL
PHY_INTERFACE_MODE_MAXBook keeping
Description
Describes the interface between the MAC and PHY.
- constchar*phy_modes(phy_interface_tinterface)¶
map phy_interface_t
enumtodevice tree binding of phy-mode
Parameters
phy_interface_tinterfaceenumphy_interface_tvalue
Description
maps enumphy_interface_t defined in this fileinto the device tree binding of ‘phy-mode’, so that Ethernetdevice driver can get PHY interface from device tree.
- longrgmii_clock(intspeed)¶
map link speed to the clock rate
Parameters
intspeedlink speed value
Description
maps RGMII supported link speeds into the clock rates.This can also be used for MII, GMII, and RMII interface modes as theclock rates are identical, but the caller must be aware that errorsfor unsupported clock rates will not be signalled.
Return
clock rate or negative errno
- structmdio_bus_stats¶
Statistics counters for MDIO busses
Definition:
struct mdio_bus_stats { u64_stats_t transfers; u64_stats_t errors; u64_stats_t writes; u64_stats_t reads; struct u64_stats_sync syncp;};Members
transfersTotal number of transfers, i.e.writes +reads
errorsNumber of MDIO transfers that returned an error
writesNumber of write transfers
readsNumber of read transfers
syncpSynchronisation for incrementing statistics
- structmii_bus¶
Represents an MDIO bus
Definition:
struct mii_bus { struct module *owner; const char *name; char id[MII_BUS_ID_SIZE]; void *priv; int (*read)(struct mii_bus *bus, int addr, int regnum); int (*write)(struct mii_bus *bus, int addr, int regnum, u16 val); int (*read_c45)(struct mii_bus *bus, int addr, int devnum, int regnum); int (*write_c45)(struct mii_bus *bus, int addr, int devnum, int regnum, u16 val); int (*reset)(struct mii_bus *bus); struct mdio_bus_stats stats[PHY_MAX_ADDR]; struct mutex mdio_lock; struct device *parent; enum { MDIOBUS_ALLOCATED = 1, MDIOBUS_REGISTERED, MDIOBUS_UNREGISTERED, MDIOBUS_RELEASED, } state; struct device dev; struct mdio_device *mdio_map[PHY_MAX_ADDR]; u32 phy_mask; u32 phy_ignore_ta_mask; int irq[PHY_MAX_ADDR]; int reset_delay_us; int reset_post_delay_us; struct gpio_desc *reset_gpiod; struct mutex shared_lock;#if IS_ENABLED(CONFIG_PHY_PACKAGE); struct phy_package_shared *shared[PHY_MAX_ADDR];#endif;};Members
ownerWho owns this device
nameUser friendly name for this MDIO device, or driver name
idUnique identifier for this bus, typical from bus hierarchy
privDriver private data
readPerform a read transfer on the bus
writePerform a write transfer on the bus
read_c45Perform a C45 read transfer on the bus
write_c45Perform a C45 write transfer on the bus
resetPerform a reset of the bus
statsStatistic counters per device on the bus
mdio_lockA lock to ensure that only one thing can read/writethe MDIO bus at a time
parentParent device of this bus
stateState of bus structure
devKernel device representation
mdio_maplist of all MDIO devices on bus
phy_maskPHY addresses to be ignored when probing
phy_ignore_ta_maskPHY addresses to ignore the TA/read failure
irqAn array of interrupts, each PHY’s interrupt at the indexmatching its address
reset_delay_usGPIO reset pulse width in microseconds
reset_post_delay_usGPIO reset deassert delay in microseconds
reset_gpiodReset GPIO descriptor pointer
shared_lockprotect access to the shared element
sharedshared state across different PHYs
Description
The Bus class for PHYs. Devices which provide access toPHYs should register using this structure
Parameters
voidno arguments
Description
The internal state of the MDIO bus will be set of MDIOBUS_ALLOCATED readyfor the driver to register the bus.
- enumphy_state¶
PHY state machine states:
Constants
PHY_DOWNPHY device and driver are not ready for anything. probeshould be called if and only if the PHY is in this state,given that the PHY device exists.- PHY driver probe function will set the state toPHY_READY
PHY_READYPHY is ready to send and receive packets, but thecontroller is not. By default, PHYs which do not implementprobe will be set to this state by
phy_probe().- start will set the state to UPPHY_HALTEDPHY is up, but no polling or interrupts are done.- phy_start moves toPHY_UP
PHY_ERRORPHY is up, but is in an error state.- phy_stop moves toPHY_HALTED
PHY_UPThe PHY and attached device are ready to do work.Interrupts should be started here.- timer moves toPHY_NOLINK orPHY_RUNNING
PHY_RUNNINGPHY is currently up, running, and possibly sendingand/or receiving packets- irq or timer will setPHY_NOLINK if link goes down- phy_stop moves toPHY_HALTED
PHY_NOLINKPHY is up, but not currently plugged in.- irq or timer will setPHY_RUNNING if link comes back- phy_stop moves toPHY_HALTED
PHY_CABLETESTPHY is performing a cable test. Packet reception/sendingis not expected to work, carrier will be indicated as down. PHY will bepoll once per second, or on interrupt for it current state.Once complete, move to UP to restart the PHY.- phy_stop aborts the running test and moves toPHY_HALTED
- structphy_c45_device_ids¶
802.3-c45 Device Identifiers
Definition:
struct phy_c45_device_ids { u32 devices_in_package; u32 mmds_present; u32 device_ids[MDIO_MMD_NUM];};Members
devices_in_packageIEEE 802.3 devices in package register value.
mmds_presentbit vector of MMDs present.
device_idsThe device identifier for each present device.
- structphy_oatc14_sqi_capability¶
SQI capability information for OATC14 10Base-T1S PHY
Definition:
struct phy_oatc14_sqi_capability { bool updated; int sqi_max; u8 sqiplus_bits;};Members
updatedIndicates whether the SQI capability fields have been updated.
sqi_maxMaximum supported Signal Quality Indicator (SQI) level reported bythe PHY.
sqiplus_bitsBits for SQI+ levels supported by the PHY.0 - SQI+ is not supported3 - SQI+ is supported, using 3 bits (8 levels)4 - SQI+ is supported, using 4 bits (16 levels)5 - SQI+ is supported, using 5 bits (32 levels)6 - SQI+ is supported, using 6 bits (64 levels)7 - SQI+ is supported, using 7 bits (128 levels)8 - SQI+ is supported, using 8 bits (256 levels)
Description
This structure is used by the OATC14 10Base-T1S PHY driver to store the SQIand SQI+ capability information retrieved from the PHY.
- structphy_device¶
An instance of a PHY
Definition:
struct phy_device { struct mdio_device mdio; const struct phy_driver *drv; struct device_link *devlink; u32 phyindex; u32 phy_id; struct phy_c45_device_ids c45_ids; unsigned is_c45:1; unsigned is_internal:1; unsigned is_pseudo_fixed_link:1; unsigned is_gigabit_capable:1; unsigned has_fixups:1; unsigned suspended:1; unsigned suspended_by_mdio_bus:1; unsigned sysfs_links:1; unsigned loopback_enabled:1; unsigned downshifted_rate:1; unsigned is_on_sfp_module:1; unsigned mac_managed_pm:1; unsigned wol_enabled:1; unsigned is_genphy_driven:1; unsigned autoneg:1; unsigned link:1; unsigned autoneg_complete:1; bool pause:1; bool asym_pause:1; unsigned interrupts:1; unsigned irq_suspended:1; unsigned irq_rerun:1; unsigned default_timestamp:1; int rate_matching; enum phy_state state; u32 dev_flags; phy_interface_t interface; unsigned long possible_interfaces[BITS_TO_LONGS(PHY_INTERFACE_MODE_MAX)]; int speed; int duplex; int port; u8 master_slave_get; u8 master_slave_set; u8 master_slave_state; unsigned long supported[BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS)]; unsigned long advertising[BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS)]; unsigned long lp_advertising[BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS)]; unsigned long adv_old[BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS)]; unsigned long supported_eee[BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS)]; unsigned long advertising_eee[BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS)]; unsigned long eee_disabled_modes[BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS)]; bool enable_tx_lpi; bool eee_active; struct eee_config eee_cfg; unsigned long host_interfaces[BITS_TO_LONGS(PHY_INTERFACE_MODE_MAX)];#ifdef CONFIG_LED_TRIGGER_PHY; struct phy_led_trigger *phy_led_triggers; unsigned int phy_num_led_triggers; struct phy_led_trigger *last_triggered; struct phy_led_trigger *led_link_trigger;#endif; struct list_head leds; int irq; void *priv;#if IS_ENABLED(CONFIG_PHY_PACKAGE); struct phy_package_shared *shared;#endif; struct sk_buff *skb; void *ehdr; struct nlattr *nest; struct delayed_work state_queue; struct mutex lock; bool sfp_bus_attached; struct sfp_bus *sfp_bus; struct phylink *phylink; struct net_device *attached_dev; struct mii_timestamper *mii_ts; struct pse_control *psec; u8 mdix; u8 mdix_ctrl; int pma_extable; unsigned int link_down_events; void (*phy_link_change)(struct phy_device *phydev, bool up); void (*adjust_link)(struct net_device *dev);#if IS_ENABLED(CONFIG_MACSEC); const struct macsec_ops *macsec_ops;#endif; struct phy_oatc14_sqi_capability oatc14_sqi_capability;};Members
mdioMDIO bus this PHY is on
drvPointer to the driver for this PHY instance
devlinkCreate a link between phy dev and mac dev, if the external phyused by current mac interface is managed by another mac interface.
phyindexUnique id across the phy’s parent tree of phys to address the PHYfrom userspace, similar to ifindex. A zero index means the PHYwasn’t assigned an id yet.
phy_idUID for this device found during discovery
c45_ids802.3-c45 Device Identifiers if is_c45.
is_c45Set to true if this PHY uses clause 45 addressing.
is_internalSet to true if this PHY is internal to a MAC.
is_pseudo_fixed_linkSet to true if this PHY is an Ethernet switch, etc.
is_gigabit_capableSet to true if PHY supports 1000Mbps
has_fixupsSet to true if this PHY has fixups/quirks.
suspendedSet to true if this PHY has been suspended successfully.
suspended_by_mdio_busSet to true if this PHY was suspended by MDIO bus.
sysfs_linksInternal boolean tracking sysfs symbolic links setup/removal.
loopback_enabledSet true if this PHY has been loopbacked successfully.
downshifted_rateSet true if link speed has been downshifted.
is_on_sfp_moduleSet true if PHY is located on an SFP module.
mac_managed_pmSet true if MAC driver takes of suspending/resuming PHY
wol_enabledSet to true if the PHY or the attached MAC have Wake-on-LANenabled.
is_genphy_drivenPHY is driven by one of the generic PHY drivers
autonegFlag autoneg being used
linkCurrent link state
autoneg_completeFlag auto negotiation of the link has completed
pauseCurrent pause
asym_pauseCurrent asymmetric pause
interruptsFlag interrupts have been enabled
irq_suspendedFlag indicating PHY is suspended and therefore interrupthandling shall be postponed until PHY has resumed
irq_rerunFlag indicating interrupts occurred while PHY was suspended,requiring a rerun of the interrupt handler after resume
default_timestampFlag indicating whether we are using the phytimestamp as the default one
rate_matchingCurrent rate matching mode
stateState of the PHY for management purposes
dev_flagsDevice-specific flags used by the PHY driver.
interfaceenumphy_interface_tvaluepossible_interfacesbitmap if interface modes that the attached PHYwill switch between depending on media speed.
speedCurrent link speed
duplexCurrent duplex
portCurrent port
master_slave_getCurrent master/slave advertisement
master_slave_setUser requested master/slave configuration
master_slave_stateCurrent master/slave configuration
supportedCombined MAC/PHY supported linkmodes
advertisingCurrently advertised linkmodes
lp_advertisingCurrent link partner advertised linkmodes
adv_oldSaved advertised while power saving for WoL
supported_eeesupported PHY EEE linkmodes
advertising_eeeCurrently advertised EEE linkmodes
eee_disabled_modesEnergy efficient ethernet modes not to be advertised
enable_tx_lpiWhen True, MAC should transmit LPI to PHY
eee_activephylib private state, indicating that EEE has been negotiated
eee_cfgUser configuration of EEE
host_interfacesPHY interface modes supported by host
phy_led_triggersArray of LED triggers
phy_num_led_triggersNumber of triggers inphy_led_triggers
last_triggeredlast LED trigger for link speed
led_link_triggerLED trigger for link up/down
ledslist of PHY LED structures
irqIRQ number of the PHY’s interrupt (-1 if none)
privPointer to driver private data
sharedPointer to private data shared by phys in one package
skbNetlink message for cable diagnostics
ehdrnNtlink header for cable diagnostics
nestNetlink nest used for cable diagnostics
state_queueWork queue for state machine
lockMutex for serialization access to PHY
sfp_bus_attachedFlag indicating whether the SFP bus has been attached
sfp_busSFP bus attached to this PHY’s fiber port
phylinkPointer to phylink instance for this PHY
attached_devThe attached enet driver’s device instance ptr
mii_tsPointer to time stamper callbacks
psecPointer to Power Sourcing Equipment control struct
mdixCurrent crossover
mdix_ctrlUser setting of crossover
pma_extableCached value of PMA/PMD Extended Abilities Register
link_down_eventsNumber of times link was lost
phy_link_changeCallback for phylink for notification of link change
adjust_linkCallback for the enet controller to respond to changes: in thelink state.
macsec_opsMACsec offloading ops.
oatc14_sqi_capabilitySQI capability information for OATC14 10Base-T1S PHY
Description
Bits [15:0] are free to use by the PHY driver to communicatedriver specific behavior.
Bits [23:16] are currently reserved for future use.
Bits [31:24] are reserved for defining genericPHY driver behavior.
interrupts currently only supports enabled or disabled,but could be changed in the future to support enablingand disabling specific interrupts
Contains some infrastructure for polling and interrupthandling, as well as handling shifts in PHY hardware state
- structphy_tdr_config¶
Configuration of a TDR raw test
Definition:
struct phy_tdr_config { u32 first; u32 last; u32 step; s8 pair;};Members
firstDistance for first data collection point
lastDistance for last data collection point
stepStep between data collection points
pairBitmap of cable pairs to collect data for
Description
A structure containing possible configuration parametersfor a TDR cable test. The driver does not need to implementall the parameters, but should report what is actually used.All distances are in centimeters.
- enumlink_inband_signalling¶
in-band signalling modes that are supported
Constants
LINK_INBAND_DISABLEin-band signalling can be disabled
LINK_INBAND_ENABLEin-band signalling can be enabled without bypass
LINK_INBAND_BYPASSin-band signalling can be enabled with bypass
Description
The possible and required bits can only be used if the valid bit is set.If possible is clear, that means inband signalling can not be used.Required is only valid when possible is set, and means that inbandsignalling must be used.
- structphy_plca_cfg¶
Configuration of the PLCA (Physical Layer Collision Avoidance) Reconciliation Sublayer.
Definition:
struct phy_plca_cfg { int version; int enabled; int node_id; int node_cnt; int to_tmr; int burst_cnt; int burst_tmr;};Members
versionread-only PLCA register map version. -1 = not available. Ignoredwhen setting the configuration. Format is the same as reported by the PLCAIDVER register (31.CA00). -1 = not available.
enabledPLCA configured mode (enabled/disabled). -1 = not available / don’tset. 0 = disabled, anything else = enabled.
node_idthe PLCA local node identifier. -1 = not available / don’t set.Allowed values [0 .. 254]. 255 = node disabled.
node_cntthe PLCA node count (maximum number of nodes having a TO). Onlymeaningful for the coordinator (node_id = 0). -1 = not available / don’tset. Allowed values [1 .. 255].
to_tmrThe value of the PLCA to_timer in bit-times, which determines thePLCA transmit opportunity window opening. See IEEE802.3 Clause 148 formore details. The to_timer shall be set equal over all nodes.-1 = not available / don’t set. Allowed values [0 .. 255].
burst_cntcontrols how many additional frames a node is allowed to send insingle transmit opportunity (TO). The default value of 0 means that thenode is allowed exactly one frame per TO. A value of 1 allows two framesper TO, and so on. -1 = not available / don’t set.Allowed values [0 .. 255].
burst_tmrcontrols how many bit times to wait for the MAC to send a newframe before interrupting the burst. This value should be set to a valuegreater than the MAC inter-packet gap (which is typically 96 bits).-1 = not available / don’t set. Allowed values [0 .. 255].
Description
A structure containing configuration parameters for setting/getting the PLCARS configuration. The driver does not need to implement all the parameters,but should report what is actually used.
- structphy_plca_status¶
Status of the PLCA (Physical Layer Collision Avoidance) Reconciliation Sublayer.
Definition:
struct phy_plca_status { bool pst;};Members
pstThe PLCA status as reported by the PST bit in the PLCA STATUSregister(31.CA03), indicating BEACON activity.
Description
A structure containing status information of the PLCA RS configuration.The driver does not need to implement all the parameters, but should reportwhat is actually used.
- structphy_led¶
An LED driven by the PHY
Definition:
struct phy_led { struct list_head list; struct phy_device *phydev; struct led_classdev led_cdev; u8 index;};Members
listList of LEDs
phydevPHY this LED is attached to
led_cdevStandard LED class structure
indexNumber of the LED
- structphy_mse_capability¶
Capabilities of Mean Square Error (MSE) measurement interface
Definition:
struct phy_mse_capability { u64 max_average_mse; u64 max_peak_mse; u64 refresh_rate_ps; u64 num_symbols; u32 supported_caps;};Members
max_average_mseThe maximum value for an average MSE snapshot. Thisdefines the scale for the measurement. If the PHY_MSE_CAP_AVG capability issupported, this value MUST be greater than 0. (vendor-specific units).
max_peak_mseThe maximum value for a peak MSE snapshot. If eitherPHY_MSE_CAP_PEAK or PHY_MSE_CAP_WORST_PEAK is supported, this value MUSTbe greater than 0. (vendor-specific units).
refresh_rate_psThe typical interval, in picoseconds, between hardwareupdates of the MSE values. This is an estimate, and callers should notassume synchronous sampling. (vendor-specific units).
num_symbolsThe number of symbols aggregated per hardware sample tocalculate the MSE. (vendor-specific units).
supported_capsA bitmask of PHY_MSE_CAP_* values indicating whichmeasurement types (e.g., average, peak) and channels(e.g., per-pair or link-wide) are supported.
Description
Standardization notes:
Presence of MSE/SQI/pMSE is defined by OPEN Alliance specs, but numericscaling, refresh/update rate and aggregation windows are not fixed andare vendor-/product-specific. (OA 100BASE-T1 TC1 v1.0 6.1.*;OA 1000BASE-T1 TC12 v2.2 6.1.*)
Typical recommendations: 2^16 symbols and 0..511 scaling for MSE; pMSE onlydefined for 100BASE-T1 (sliding window example), others are vendorextensions. Drivers must report actual scale/limits here.
Describes the MSE measurement capabilities for the current link mode. Theseproperties are dynamic and may change when link settings are modified.Callers should re-query this capability after any link state change toensure they have the most up-to-date information.
Callers should only request measurements for channels and types that areindicated as supported by thesupported_caps bitmask. Ifsupported_capsis 0, the device provides no MSE diagnostics, and driver operations shouldtypically return -EOPNOTSUPP.
Snapshot values for average and peak MSE can be normalized to a 0..1 ratioby dividing the raw snapshot by the correspondingmax_average_mse ormax_peak_mse value.
- structphy_mse_snapshot¶
A snapshot of Mean Square Error (MSE) diagnostics
Definition:
struct phy_mse_snapshot { u64 average_mse; u64 peak_mse; u64 worst_peak_mse;};Members
average_mseThe average MSE value over the measurement window.OPEN Alliance references MSE as a DCQ metric; recommends 2^16 symbols and0..511 scaling. Exact scale and refresh are vendor-specific.(100BASE-T1 TC1 v1.0 6.1.1; 1000BASE-T1 TC12 v2.2 6.1.1).
peak_mseThe peak MSE value observed within the measurement window.For 100BASE-T1, “pMSE” is optional and may be implemented via a sliding128-symbol window with periodic capture; not standardized for 1000BASE-T1.(100BASE-T1 TC1 v1.0 6.1.3, Table “DCQ.peakMSE”).
worst_peak_mseA latched high-water mark of the peak MSE since last read(read-to-clear if implemented). OPEN Alliance shows a latched “worst casepeak MSE” for 100BASE-T1 pMSE; availability/semantics outside that arevendor-specific. (100BASE-T1 TC1 v1.0 6.1.3, DCQ.peakMSE high byte;1000BASE-T1 TC12 v2.2 treats DCQ details as vendor-specific.)
Description
Holds a set of MSE diagnostic values that were all captured from a singlemeasurement window.
Values are raw, device-scaled and not normalized. Usestructphy_mse_capability to interpret the scale and sampling window.
- structphy_driver¶
Driver structure for a particular PHY type
Definition:
struct phy_driver { struct mdio_driver_common mdiodrv; u32 phy_id; char *name; u32 phy_id_mask; const unsigned long * const features; u32 flags; const void *driver_data; int (*soft_reset)(struct phy_device *phydev); int (*config_init)(struct phy_device *phydev); int (*probe)(struct phy_device *phydev); int (*get_features)(struct phy_device *phydev); unsigned int (*inband_caps)(struct phy_device *phydev, phy_interface_t interface); int (*config_inband)(struct phy_device *phydev, unsigned int modes); int (*get_rate_matching)(struct phy_device *phydev, phy_interface_t iface); int (*suspend)(struct phy_device *phydev); int (*resume)(struct phy_device *phydev); int (*config_aneg)(struct phy_device *phydev); int (*aneg_done)(struct phy_device *phydev); int (*read_status)(struct phy_device *phydev); int (*config_intr)(struct phy_device *phydev); irqreturn_t (*handle_interrupt)(struct phy_device *phydev); void (*remove)(struct phy_device *phydev); int (*match_phy_device)(struct phy_device *phydev, const struct phy_driver *phydrv); int (*set_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol); void (*get_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol); void (*link_change_notify)(struct phy_device *dev); int (*read_mmd)(struct phy_device *dev, int devnum, u16 regnum); int (*write_mmd)(struct phy_device *dev, int devnum, u16 regnum, u16 val); int (*read_page)(struct phy_device *dev); int (*write_page)(struct phy_device *dev, int page); int (*module_info)(struct phy_device *dev, struct ethtool_modinfo *modinfo); int (*module_eeprom)(struct phy_device *dev, struct ethtool_eeprom *ee, u8 *data); int (*cable_test_start)(struct phy_device *dev); int (*cable_test_tdr_start)(struct phy_device *dev, const struct phy_tdr_config *config); int (*cable_test_get_status)(struct phy_device *dev, bool *finished); void (*get_phy_stats)(struct phy_device *dev, struct ethtool_eth_phy_stats *eth_stats, struct ethtool_phy_stats *stats); void (*get_link_stats)(struct phy_device *dev, struct ethtool_link_ext_stats *link_stats); int (*update_stats)(struct phy_device *dev); int (*get_sset_count)(struct phy_device *dev); void (*get_strings)(struct phy_device *dev, u8 *data); void (*get_stats)(struct phy_device *dev, struct ethtool_stats *stats, u64 *data); int (*get_tunable)(struct phy_device *dev, struct ethtool_tunable *tuna, void *data); int (*set_tunable)(struct phy_device *dev, struct ethtool_tunable *tuna, const void *data); int (*set_loopback)(struct phy_device *dev, bool enable, int speed); int (*get_sqi)(struct phy_device *dev); int (*get_sqi_max)(struct phy_device *dev); int (*get_mse_capability)(struct phy_device *dev, struct phy_mse_capability *cap); int (*get_mse_snapshot)(struct phy_device *dev, enum phy_mse_channel channel, struct phy_mse_snapshot *snapshot); int (*get_plca_cfg)(struct phy_device *dev, struct phy_plca_cfg *plca_cfg); int (*set_plca_cfg)(struct phy_device *dev, const struct phy_plca_cfg *plca_cfg); int (*get_plca_status)(struct phy_device *dev, struct phy_plca_status *plca_st); int (*led_brightness_set)(struct phy_device *dev, u8 index, enum led_brightness value); int (*led_blink_set)(struct phy_device *dev, u8 index, unsigned long *delay_on, unsigned long *delay_off); int (*led_hw_is_supported)(struct phy_device *dev, u8 index, unsigned long rules); int (*led_hw_control_set)(struct phy_device *dev, u8 index, unsigned long rules); int (*led_hw_control_get)(struct phy_device *dev, u8 index, unsigned long *rules); int (*led_polarity_set)(struct phy_device *dev, int index, unsigned long modes); unsigned int (*get_next_update_time)(struct phy_device *dev);};Members
mdiodrvData common to all MDIO devices
phy_idThe result of reading the UID registers of this PHYtype, and ANDing them with the phy_id_mask. This driveronly works for PHYs with IDs which match this field
nameThe friendly name of this PHY type
phy_id_maskDefines the important bits of the phy_id
featuresA mandatory list of features (speed, duplex, etc)supported by this PHY
flagsA bitfield defining certain other features this PHYsupports (like interrupts)
driver_dataStatic driver data
soft_resetCalled to issue a PHY software reset
config_initCalled to initialize the PHY,including after a reset
probeCalled during discovery. Used to setup device-specific structures, if any
get_featuresProbe the hardware to determine whatabilities it has. Should only set phydev->supported.
inband_capsquery whether in-band is supported for the given PHYinterface mode. Returns a bitmask of bits defined by
enumlink_inband_signalling.config_inbandconfigure in-band mode for the PHY
get_rate_matchingGet the supported type of rate matching for aparticular phy interface. This is used by phy consumers to determinewhether to advertise lower-speed modes for that interface. It isassumed that if a rate matching mode is supported on an interface,then that interface’s rate can be adapted to all slower link speedssupported by the phy. If the interface is not supported, this shouldreturn
RATE_MATCH_NONE.suspendSuspend the hardware, saving state if needed
resumeResume the hardware, restoring state if needed
config_anegConfigures the advertisement and resetsautonegotiation if phydev->autoneg is on,forces the speed to the current settings in phydevif phydev->autoneg is off
aneg_doneDetermines the auto negotiation result
read_statusDetermines the negotiated speed and duplex
config_intrEnables or disables interrupts.It should also clear any pending interrupts prior to enabling theIRQs and after disabling them.
handle_interruptOverride default interrupt handling
removeClears up any memory if needed
match_phy_deviceReturns true if this is a suitabledriver for the given phydev. If NULL, matching is based onphy_id and phy_id_mask.
set_wolSome devices (e.g. qnap TS-119P II) require PHYregister changes to enable Wake on LAN, so set_wol isprovided to be called in the ethernet driver’s set_wolfunction.
get_wolSee set_wol, but for checking whether Wake on LANis enabled.
link_change_notifyCalled to inform a PHY device driverwhen the core is about to change the link state. Thiscallback is supposed to be used as fixup hook for driversthat need to take action when the link statechanges. Drivers are by no means allowed to mess with thePHY device structure in their implementations.
read_mmdPHY specific driver override for reading a MMDregister. This function is optional for PHY specificdrivers. When not provided, the default MMD read functionwill be used by
phy_read_mmd(), which will use either adirect read for Clause 45 PHYs or an indirect read forClause 22 PHYs. devnum is the MMD device number within thePHY device, regnum is the register within the selected MMDdevice.write_mmdPHY specific driver override for writing a MMDregister. This function is optional for PHY specificdrivers. When not provided, the default MMD write functionwill be used by
phy_write_mmd(), which will use either adirect write for Clause 45 PHYs, or an indirect write forClause 22 PHYs. devnum is the MMD device number within thePHY device, regnum is the register within the selected MMDdevice. val is the value to be written.read_pageReturn the current PHY register page number
write_pageSet the current PHY register page number
module_infoGet the size and type of the eeprom containedwithin a plug-in module
module_eepromGet the eeprom information from the plug-inmodule
cable_test_startStart a cable test
cable_test_tdr_startStart a raw TDR cable test
cable_test_get_statusOnce per second, or on interrupt,request the status of the test.
get_phy_statsRetrieve PHY statistics.dev: The PHY device for which the statistics are retrieved.eth_stats: structure where Ethernet PHY stats will be stored.stats: structure where additional PHY-specific stats will be stored.
Retrieves the supported PHY statistics and populates the providedstructures. The input structures are pre-initialized withETHTOOL_STAT_NOT_SET, and the driver must only modify memberscorresponding to supported statistics. Unmodified members will remainset toETHTOOL_STAT_NOT_SET and will not be returned to userspace.
get_link_statsRetrieve link statistics.dev: The PHY device for which the statistics are retrieved.link_stats: structure where link-specific stats will be stored.
Retrieves link-related statistics for the given PHY device. The inputstructure is pre-initialized withETHTOOL_STAT_NOT_SET, and thedriver must only modify members corresponding to supportedstatistics. Unmodified members will remain set toETHTOOL_STAT_NOT_SET and will not be returned to userspace.
update_statsTrigger periodic statistics updates.dev: The PHY device for which statistics updates are triggered.
Periodically gathers statistics from the PHY device to update locallymaintained 64-bit counters. This is necessary for PHYs that implementreduced-width counters (e.g., 16-bit or 32-bit) which can overflowmore frequently compared to 64-bit counters. By invoking thiscallback, drivers can fetch the current counter values, handleoverflow detection, and accumulate the results into local 64-bitcounters for accurate reporting through theget_phy_stats andget_link_stats interfaces.
Return: 0 on success or a negative error code on failure.
get_sset_countNumber of statistic counters
get_stringsNames of the statistic counters
get_statsReturn the statistic counter values
get_tunableReturn the value of a tunable
set_tunableSet the value of a tunable
set_loopbackSet the loopback mode of the PHYenable selects if the loopback mode is enabled or disabled. If theloopback mode is enabled, then the speed of the loopback mode can berequested with the speed argument. If the speed argument is zero,then any speed can be selected. If the speed argument is > 0, thenthis speed shall be selected for the loopback mode or EOPNOTSUPPshall be returned if speed selection is not supported.
get_sqiGet the signal quality indication
get_sqi_maxGet the maximum signal quality indication
get_mse_capabilityGet capabilities and scale of MSE measurementdev: PHY devicecap: Output (filled on success)
Fillcap with the PHY’s MSE capability for the currentlink mode: scale limits (max_average_mse, max_peak_mse), updateinterval (refresh_rate_ps), sample length (num_symbols) and thecapability bitmask (supported_caps).
Implementations may defer capability report until hardware hasconverged; in that case they should return -EAGAIN and allow thecaller to retry later.
Return: 0 on success. On failure, returns a negative errno code, suchas -EOPNOTSUPP if MSE measurement is not supported by the PHY or inthe current link mode, or -EAGAIN if the capability information isnot yet available.
get_mse_snapshotRetrieve a snapshot of MSE diagnostic valuesdev: PHY devicechannel: Channel identifier (PHY_MSE_CHANNEL_*)snapshot: Output (filled on success)
Fillsnapshot with a correlated set of MSE values from the mostrecent measurement window.
Callers must validatechannel against supported_caps returned by
get_mse_capability(). Drivers must not coercechannel; if therequested selector is not implemented by the device or current linkmode, the operation must fail.worst_peak_mse is latched and must be treated as read-to-clear.
Return: 0 on success. On failure, returns a negative errno code, suchas -EOPNOTSUPP if MSE measurement is not supported by the PHY or inthe current link mode, or -EAGAIN if measurements are not yetavailable.
get_plca_cfgReturn the current PLCA configuration
set_plca_cfgSet the PLCA configuration
get_plca_statusReturn the current PLCA status info
led_brightness_setSet a PHY LED brightness. Indexindicates which of the PHYs led should be set. Valuefollows the standard LED class meaning, e.g. LED_OFF,LED_HALF, LED_FULL.
led_blink_setSet a PHY LED blinking. Index indicateswhich of the PHYs led should be configured to blink. Delaysare in milliseconds and if both are zero then a sensibledefault should be chosen. The call should adjust thetimings in that case and if it can’t match the valuesspecified exactly.
led_hw_is_supportedCan the HW support the given rules.dev: PHY device which has the LEDindex: Which LED of the PHY devicerules The core is interested in these rules
Return 0 if yes, -EOPNOTSUPP if not, or an error code.
led_hw_control_setSet the HW to control the LEDdev: PHY device which has the LEDindex: Which LED of the PHY devicerules The rules used to control the LED
Returns 0, or a an error code.
led_hw_control_getGet how the HW is controlling the LEDdev: PHY device which has the LEDindex: Which LED of the PHY devicerules Pointer to the rules used to control the LED
Set*rules to how the HW is currently blinking. Returns 0on success, or a error code if the current blinking cannotbe represented in rules, or some other error happens.
led_polarity_setSet the LED polarity modesdev: PHY device which has the LEDindex: Which LED of the PHY devicemodes: bitmap of LED polarity modes
Configure LED with all the required polarity modes inmodesto make it correctly turn ON or OFF.
Returns 0, or an error code.
get_next_update_timeGet the time until the next update eventdev: PHY device
Callback to determine the time (in jiffies) until the nextupdate event for the PHY state machine. Allows PHY drivers todynamically adjust polling intervals based on link state or otherconditions.
Returns the time in jiffies until the next update event.
Description
All functions are optional. If config_aneg or read_statusare not implemented, the phy core uses the genphy versions.Note that none of these functions should be called frominterrupt time. The goal is for the bus read/write functionsto be able to block when the bus transaction is happening,and be freed up by an interrupt (The MPC85xx has this ability,though it is not currently supported in the driver).
- boolphy_id_compare(u32id1,u32id2,u32mask)¶
compareid1 withid2 taking account ofmask
Parameters
u32id1first PHY ID
u32id2second PHY ID
u32maskthe PHY ID mask, set bits are significant in matching
Description
Return true if the bits fromid1 andid2 specified bymask match.This uses an equivalent test to (id &mask) == (phy_id &mask).
- boolphy_id_compare_vendor(u32id,u32vendor_mask)¶
compareid withvendor mask
Parameters
u32idPHY ID
u32vendor_maskPHY Vendor mask
Return
true if the bits fromid matchvendor using thegeneric PHY Vendor mask.
- boolphy_id_compare_model(u32id,u32model_mask)¶
compareid withmodel mask
Parameters
u32idPHY ID
u32model_maskPHY Model mask
Return
true if the bits fromid matchmodel using thegeneric PHY Model mask.
- boolphydev_id_compare(structphy_device*phydev,u32id)¶
compareid with the PHY’s Clause 22 ID
Parameters
structphy_device*phydevthe PHY device
u32idthe PHY ID to be matched
Description
Compare thephydev clause 22 ID with the providedid and return true orfalse depending whether it matches, using the bound driver mask. Thephydev must be bound to a driver.
- boolphy_is_started(structphy_device*phydev)¶
Convenience function to check whether PHY is started
Parameters
structphy_device*phydevThe phy_device struct
- boolphy_driver_is_genphy(structphy_device*phydev)¶
Convenience function to check whether PHY is driven by one of the generic PHY drivers
Parameters
structphy_device*phydevThe phy_device struct
Return
true if PHY is driven by one of the genphy drivers
- voidphy_disable_eee_mode(structphy_device*phydev,u32link_mode)¶
Don’t advertise an EEE mode.
Parameters
structphy_device*phydevThe phy_device struct
u32link_modeThe EEE mode to be disabled
- boolphy_can_wakeup(structphy_device*phydev)¶
indicate whether PHY has driver model wakeup capabilities
Parameters
structphy_device*phydevThe phy_device struct
Return
true/false depending on the PHY driver’sdevice_set_wakeup_capable()setting.
- boolphy_may_wakeup(structphy_device*phydev)¶
indicate whether PHY has wakeup enabled
Parameters
structphy_device*phydevThe phy_device struct
Return
true/false depending on the PHY driver’sdevice_set_wakeup_enabled()setting if using the driver model, otherwise the legacy determination.
- intphy_read(structphy_device*phydev,u32regnum)¶
Convenience function for reading a given PHY register
Parameters
structphy_device*phydevthe phy_device struct
u32regnumregister number to read
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- int__phy_read(structphy_device*phydev,u32regnum)¶
convenience function for reading a given PHY register
Parameters
structphy_device*phydevthe phy_device struct
u32regnumregister number to read
Description
The caller must have taken the MDIO bus lock.
- intphy_write(structphy_device*phydev,u32regnum,u16val)¶
Convenience function for writing a given PHY register
Parameters
structphy_device*phydevthe phy_device struct
u32regnumregister number to write
u16valvalue to write toregnum
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- int__phy_write(structphy_device*phydev,u32regnum,u16val)¶
Convenience function for writing a given PHY register
Parameters
structphy_device*phydevthe phy_device struct
u32regnumregister number to write
u16valvalue to write toregnum
Description
The caller must have taken the MDIO bus lock.
- int__phy_modify_changed(structphy_device*phydev,u32regnum,u16mask,u16set)¶
Convenience function for modifying a PHY register
Parameters
structphy_device*phydeva pointer to a
structphy_deviceu32regnumregister number
u16maskbit mask of bits to clear
u16setbit mask of bits to set
Description
Unlocked helper function which allows a PHY register to be modified asnew register value = (old register value & ~mask) | set
Returns negative errno, 0 if there was no change, and 1 in case of change
- phy_read_mmd_poll_timeout¶
phy_read_mmd_poll_timeout(phydev,devaddr,regnum,val,cond,sleep_us,timeout_us,sleep_before_read)
Periodically poll a PHY register until a condition is met or a timeout occurs
Parameters
phydevThe phy_device struct
devaddrThe MMD to read from
regnumThe register on the MMD to read
valVariable to read the register into
condBreak condition (usually involvingval)
sleep_usMaximum time to sleep between reads in us (0 tight-loops). Pleaseread
usleep_range()function description for details andlimitations.timeout_usTimeout in us, 0 means never timeout
sleep_before_readif it is true, sleepsleep_us before read.
Return
0 on success and -ETIMEDOUT upon a timeout. In eithercase, the last read value atargs is stored inval. Must notbe called from atomic context if sleep_us or timeout_us are used.
- int__phy_set_bits(structphy_device*phydev,u32regnum,u16val)¶
Convenience function for setting bits in a PHY register
Parameters
structphy_device*phydevthe phy_device struct
u32regnumregister number to write
u16valbits to set
Description
The caller must have taken the MDIO bus lock.
- int__phy_clear_bits(structphy_device*phydev,u32regnum,u16val)¶
Convenience function for clearing bits in a PHY register
Parameters
structphy_device*phydevthe phy_device struct
u32regnumregister number to write
u16valbits to clear
Description
The caller must have taken the MDIO bus lock.
- intphy_set_bits(structphy_device*phydev,u32regnum,u16val)¶
Convenience function for setting bits in a PHY register
Parameters
structphy_device*phydevthe phy_device struct
u32regnumregister number to write
u16valbits to set
- intphy_clear_bits(structphy_device*phydev,u32regnum,u16val)¶
Convenience function for clearing bits in a PHY register
Parameters
structphy_device*phydevthe phy_device struct
u32regnumregister number to write
u16valbits to clear
- int__phy_set_bits_mmd(structphy_device*phydev,intdevad,u32regnum,u16val)¶
Convenience function for setting bits in a register on MMD
Parameters
structphy_device*phydevthe phy_device struct
intdevadthe MMD containing register to modify
u32regnumregister number to modify
u16valbits to set
Description
The caller must have taken the MDIO bus lock.
- int__phy_clear_bits_mmd(structphy_device*phydev,intdevad,u32regnum,u16val)¶
Convenience function for clearing bits in a register on MMD
Parameters
structphy_device*phydevthe phy_device struct
intdevadthe MMD containing register to modify
u32regnumregister number to modify
u16valbits to clear
Description
The caller must have taken the MDIO bus lock.
- intphy_set_bits_mmd(structphy_device*phydev,intdevad,u32regnum,u16val)¶
Convenience function for setting bits in a register on MMD
Parameters
structphy_device*phydevthe phy_device struct
intdevadthe MMD containing register to modify
u32regnumregister number to modify
u16valbits to set
- intphy_clear_bits_mmd(structphy_device*phydev,intdevad,u32regnum,u16val)¶
Convenience function for clearing bits in a register on MMD
Parameters
structphy_device*phydevthe phy_device struct
intdevadthe MMD containing register to modify
u32regnumregister number to modify
u16valbits to clear
- boolphy_interrupt_is_valid(structphy_device*phydev)¶
Convenience function for testing a given PHY irq
Parameters
structphy_device*phydevthe phy_device struct
NOTE
must be kept in sync with addition/removal of PHY_POLL andPHY_MAC_INTERRUPT
- boolphy_polling_mode(structphy_device*phydev)¶
Convenience function for testing whether polling is used to detect PHY status changes
Parameters
structphy_device*phydevthe phy_device struct
- boolphy_has_hwtstamp(structphy_device*phydev)¶
Tests whether a PHY time stamp configuration.
Parameters
structphy_device*phydevthe phy_device struct
- boolphy_has_rxtstamp(structphy_device*phydev)¶
Tests whether a PHY supports receive time stamping.
Parameters
structphy_device*phydevthe phy_device struct
- boolphy_has_tsinfo(structphy_device*phydev)¶
Tests whether a PHY reports time stamping and/or PTP hardware clock capabilities.
Parameters
structphy_device*phydevthe phy_device struct
- boolphy_has_txtstamp(structphy_device*phydev)¶
Tests whether a PHY supports transmit time stamping.
Parameters
structphy_device*phydevthe phy_device struct
- boolphy_is_default_hwtstamp(structphy_device*phydev)¶
Is the PHY hwtstamp the default timestamp
Parameters
structphy_device*phydevPointer to phy_device
Description
This is used to get default timestamping device taking into accountthe new API choice, which is selecting the timestamping from MAC bydefault if the phydev does not have default_timestamp flag enabled.
Return
True if phy is the default hw timestamp, false otherwise.
- boolphy_on_sfp(structphy_device*phydev)¶
Convenience function for testing if a PHY is on an SFP module
Parameters
structphy_device*phydevthe phy_device struct
- boolphy_interface_mode_is_rgmii(phy_interface_tmode)¶
Convenience function for testing if a PHY interface mode is RGMII (all variants)
Parameters
phy_interface_tmodethe
phy_interface_tenum
- boolphy_interface_mode_is_8023z(phy_interface_tmode)¶
does the PHY interface mode use 802.3z negotiation
Parameters
phy_interface_tmodeone of
enumphy_interface_t
Description
Returns true if the PHY interface mode uses the 16-bit negotiationword as defined in 802.3z. (See 802.3-2015 37.2.1 Config_Reg encoding)
- boolphy_interface_is_rgmii(structphy_device*phydev)¶
Convenience function for testing if a PHY interface is RGMII (all variants)
Parameters
structphy_device*phydevthe phy_device struct
- boolphy_is_pseudo_fixed_link(structphy_device*phydev)¶
Convenience function for testing if this PHY is the CPU port facing side of an Ethernet switch, or similar.
Parameters
structphy_device*phydevthe phy_device struct
- phy_module_driver¶
phy_module_driver(__phy_drivers,__count)
Helper macro for registering PHY drivers
Parameters
__phy_driversarray of PHY drivers to register
__countNumbers of members in array
Description
Helper macro for PHY drivers which do not do anything special in moduleinit/exit. Each module may only use this macro once, and calling itreplacesmodule_init() andmodule_exit().
- intphy_unregister_fixup(constchar*bus_id,u32phy_uid,u32phy_uid_mask)¶
remove a phy_fixup from the list
Parameters
constchar*bus_idA string matches fixup->bus_id (or PHY_ANY_ID) in phy_fixup_list
u32phy_uidA phy id matches fixup->phy_id (or PHY_ANY_UID) in phy_fixup_list
u32phy_uid_maskApplied to phy_uid and fixup->phy_uid before comparison
- intgenphy_match_phy_device(structphy_device*phydev,conststructphy_driver*phydrv)¶
match a PHY device with a PHY driver
Parameters
structphy_device*phydevtarget phy_device struct
conststructphy_driver*phydrvtarget phy_driver struct
Description
Checks whether the given PHY device matches the specifiedPHY driver. For Clause 45 PHYs, iterates over the available deviceidentifiers and compares them against the driver’s expected PHY ID,applying the provided mask. For Clause 22 PHYs, a direct ID comparisonis performed.
Return
1 if the PHY device matches the driver, 0 otherwise.
- structphy_device*get_phy_device(structmii_bus*bus,intaddr,boolis_c45)¶
reads the specified PHY device and returns itsphy_device struct
Parameters
structmii_bus*busthe target MII bus
intaddrPHY address on the MII bus
boolis_c45If true the PHY uses the 802.3 clause 45 protocol
Description
Probe for a PHY ataddr onbus.
When probing for a clause 22 PHY, then read the ID registers. If we finda valid ID, allocate and return astructphy_device.
When probing for a clause 45 PHY, read the “devices in package” registers.If the “devices in package” appears valid, read the ID registers for eachMMD, allocate and return astructphy_device.
Returns an allocatedstructphy_device on success,-ENODEV if there isno PHY present, or-EIO on bus access error.
- intphy_device_register(structphy_device*phydev)¶
Register the phy device on the MDIO bus
Parameters
structphy_device*phydevphy_device structure to be added to the MDIO bus
- voidphy_device_remove(structphy_device*phydev)¶
Remove a previously registered phy device from the MDIO bus
Parameters
structphy_device*phydevphy_device structure to remove
Description
This doesn’t free the phy_device itself, it merely reverses the effectsofphy_device_register(). Usephy_device_free() to free the deviceafter calling this function.
- intphy_get_c45_ids(structphy_device*phydev)¶
Read 802.3-c45 IDs for phy device.
Parameters
structphy_device*phydevphy_device structure to read 802.3-c45 IDs
Description
Returns zero on success,-EIO on bus access error, or-ENODEV ifthe “devices in package” is invalid.
- structphy_device*phy_find_next(structmii_bus*bus,structphy_device*pos)¶
finds the next PHY device on the bus
Parameters
structmii_bus*busthe target MII bus
structphy_device*poscursor
Return
next phy_device on the bus, or NULL
- intphy_connect_direct(structnet_device*dev,structphy_device*phydev,void(*handler)(structnet_device*),phy_interface_tinterface)¶
connect an ethernet device to a specific phy_device
Parameters
structnet_device*devthe network device to connect
structphy_device*phydevthe pointer to the phy device
void(*handler)(structnet_device*)callback function for state change notifications
phy_interface_tinterfacePHY device’s interface
- structphy_device*phy_connect(structnet_device*dev,constchar*bus_id,void(*handler)(structnet_device*),phy_interface_tinterface)¶
connect an ethernet device to a PHY device
Parameters
structnet_device*devthe network device to connect
constchar*bus_idthe id string of the PHY device to connect
void(*handler)(structnet_device*)callback function for state change notifications
phy_interface_tinterfacePHY device’s interface
Description
- Convenience function for connecting ethernet
devices to PHY devices. The default behavior is forthe PHY infrastructure to handle everything, and only notifythe connected driver when the link status changes. If youdon’t want, or can’t use the provided functionality, you maychoose to call only the subset of functions which providethe desired functionality.
- voidphy_disconnect(structphy_device*phydev)¶
disable interrupts, stop state machine, and detach a PHY device
Parameters
structphy_device*phydevtarget phy_device struct
- intphy_sfp_connect_phy(void*upstream,structphy_device*phy)¶
Connect the SFP module’s PHY to the upstream PHY
Parameters
void*upstreampointer to the upstream phy device
structphy_device*phypointer to the SFP module’s phy device
Description
This helper allows keeping track of PHY devices on the link. It adds theSFP module’s phy to the phy namespace of the upstream phy
Return
0 on success, otherwise a negative error code.
- voidphy_sfp_disconnect_phy(void*upstream,structphy_device*phy)¶
Disconnect the SFP module’s PHY from the upstream PHY
Parameters
void*upstreampointer to the upstream phy device
structphy_device*phypointer to the SFP module’s phy device
Description
This helper allows keeping track of PHY devices on the link. It removes theSFP module’s phy to the phy namespace of the upstream phy. As the module phywill be destroyed, re-inserting the same module will add a new phy with anew index.
- voidphy_sfp_attach(void*upstream,structsfp_bus*bus)¶
attach the SFP bus to the PHY upstream network device
Parameters
void*upstreampointer to the phy device
structsfp_bus*bussfp bus representing cage being attached
Description
This is used to fill in the sfp_upstream_ops .attach member.
- voidphy_sfp_detach(void*upstream,structsfp_bus*bus)¶
detach the SFP bus from the PHY upstream network device
Parameters
void*upstreampointer to the phy device
structsfp_bus*bussfp bus representing cage being attached
Description
This is used to fill in the sfp_upstream_ops .detach member.
- intphy_sfp_probe(structphy_device*phydev,conststructsfp_upstream_ops*ops)¶
probe for a SFP cage attached to this PHY device
Parameters
structphy_device*phydevPointer to phy_device
conststructsfp_upstream_ops*opsSFP’s upstream operations
- intphy_attach_direct(structnet_device*dev,structphy_device*phydev,u32flags,phy_interface_tinterface)¶
attach a network device to a given PHY device pointer
Parameters
structnet_device*devnetwork device to attach
structphy_device*phydevPointer to phy_device to attach
u32flagsPHY device’s dev_flags
phy_interface_tinterfacePHY device’s interface
Description
- Called by drivers to attach to a particular PHY
device. The phy_device is found, and properly hooked upto the phy_driver. If no driver is attached, then ageneric driver is used. The phy_device is given a ptr tothe attaching device, and given a callback for link statuschange. The phy_device is returned to the attaching driver.This function takes a reference on the phy device.
- structphy_device*phy_attach(structnet_device*dev,constchar*bus_id,phy_interface_tinterface)¶
attach a network device to a particular PHY device
Parameters
structnet_device*devnetwork device to attach
constchar*bus_idBus ID of PHY device to attach
phy_interface_tinterfacePHY device’s interface
Description
- Same as phy_attach_direct() except that a PHY bus_id
string is passed instead of a pointer to a
structphy_device.
- voidphy_detach(structphy_device*phydev)¶
detach a PHY device from its network device
Parameters
structphy_device*phydevtarget phy_device struct
Description
This detaches the phy device from its network device and the phydriver, and drops the reference count taken inphy_attach_direct().
- intphy_reset_after_clk_enable(structphy_device*phydev)¶
perform a PHY reset if needed
Parameters
structphy_device*phydevtarget phy_device struct
Description
- Some PHYs are known to need a reset after their refclk was
enabled. This function evaluates the flags and perform the reset if it’sneeded. Returns < 0 on error, 0 if the phy wasn’t reset and 1 if the phywas reset.
- intgenphy_setup_forced(structphy_device*phydev)¶
configures/forces speed/duplex fromphydev
Parameters
structphy_device*phydevtarget phy_device struct
Description
- Configures MII_BMCR to force speed/duplex
to the values in phydev. Assumes that the values are valid.Please see
phy_sanitize_settings().
- intgenphy_restart_aneg(structphy_device*phydev)¶
Enable and Restart Autonegotiation
Parameters
structphy_device*phydevtarget phy_device struct
- intgenphy_check_and_restart_aneg(structphy_device*phydev,boolrestart)¶
Enable and restart auto-negotiation
Parameters
structphy_device*phydevtarget phy_device struct
boolrestartwhether aneg restart is requested
Description
Check, and restart auto-negotiation if needed.
- int__genphy_config_aneg(structphy_device*phydev,boolchanged)¶
restart auto-negotiation or write BMCR
Parameters
structphy_device*phydevtarget phy_device struct
boolchangedwhether autoneg is requested
Description
- If auto-negotiation is enabled, we configure the
advertising, and then restart auto-negotiation. If it is notenabled, then we write the BMCR.
- intgenphy_c37_config_aneg(structphy_device*phydev)¶
restart auto-negotiation or write BMCR
Parameters
structphy_device*phydevtarget phy_device struct
Description
- If auto-negotiation is enabled, we configure the
advertising, and then restart auto-negotiation. If it is notenabled, then we write the BMCR. This function is intendedfor use with Clause 37 1000Base-X mode.
- intgenphy_aneg_done(structphy_device*phydev)¶
return auto-negotiation status
Parameters
structphy_device*phydevtarget phy_device struct
Description
- Reads the status register and returns 0 either if
auto-negotiation is incomplete, or if there was an error.Returns BMSR_ANEGCOMPLETE if auto-negotiation is done.
- intgenphy_update_link(structphy_device*phydev)¶
update link status inphydev
Parameters
structphy_device*phydevtarget phy_device struct
Description
- Update the value in phydev->link to reflect the
current link value. In order to do this, we need to readthe status register twice, keeping the second value.
- intgenphy_read_status_fixed(structphy_device*phydev)¶
read the link parameters for !aneg mode
Parameters
structphy_device*phydevtarget phy_device struct
Description
Read the current duplex and speed state for a PHY operating withautonegotiation disabled.
- intgenphy_read_status(structphy_device*phydev)¶
check the link status and update current link state
Parameters
structphy_device*phydevtarget phy_device struct
Description
- Check the link, then figure out the current state
by comparing what we advertise with what the link partneradvertises. Start by checking the gigabit possibilities,then move on to 10/100.
- intgenphy_c37_read_status(structphy_device*phydev,bool*changed)¶
check the link status and update current link state
Parameters
structphy_device*phydevtarget phy_device struct
bool*changedpointer where to store if link changed
Description
- Check the link, then figure out the current state
by comparing what we advertise with what the link partneradvertises. This function is for Clause 37 1000Base-X mode.
If link has changed,changed is set to true, false otherwise.
- intgenphy_soft_reset(structphy_device*phydev)¶
software reset the PHY via BMCR_RESET bit
Parameters
structphy_device*phydevtarget phy_device struct
Description
Perform a software PHY reset using the standardBMCR_RESET bit and poll for the reset bit to be cleared.
Return
0 on success, < 0 on failure
- intgenphy_read_abilities(structphy_device*phydev)¶
read PHY abilities from Clause 22 registers
Parameters
structphy_device*phydevtarget phy_device struct
Description
Reads the PHY’s abilities and populatesphydev->supported accordingly.
Return
0 on success, < 0 on failure
- voidphy_remove_link_mode(structphy_device*phydev,u32link_mode)¶
Remove a supported link mode
Parameters
structphy_device*phydevphy_device structure to remove link mode from
u32link_modeLink mode to be removed
Description
Some MACs don’t support all link modes which the PHYdoes. e.g. a 1G MAC often does not support 1000Half. Add a helperto remove a link mode.
- voidphy_advertise_supported(structphy_device*phydev)¶
Advertise all supported modes
Parameters
structphy_device*phydevtarget phy_device struct
Description
Called to advertise all supported modes, doesn’t touchpause mode advertising.
- voidphy_advertise_eee_all(structphy_device*phydev)¶
Advertise all supported EEE modes
Parameters
structphy_device*phydevtarget phy_device struct
Description
Per default phylib preserves the EEE advertising at the time ofphy probing, which might be a subset of the supported EEE modes. Use thisfunction when all supported EEE modes should be advertised. This does nottrigger auto-negotiation, so must be called beforephy_start()/phylink_start() which will start auto-negotiation.
- voidphy_support_eee(structphy_device*phydev)¶
Set initial EEE policy configuration
Parameters
structphy_device*phydevTarget phy_device struct
Description
This function configures the initial policy for Energy Efficient Ethernet(EEE) on the specified PHY device, influencing that EEE capabilities areadvertised before the link is established. It should be called during PHYregistration by the MAC driver and/or the PHY driver (for SmartEEE PHYs)if MAC supports LPI or PHY is capable to compensate missing LPI functionalityof the MAC.
The function sets default EEE policy parameters, including preparing the PHYto advertise EEE capabilities based on hardware support.
It also sets the expected configuration for Low Power Idle (LPI) in the MACdriver. If the PHY framework determines that both local and remoteadvertisements support EEE, and the negotiated link mode is compatible withEEE, it will set enable_tx_lpi = true. The MAC driver is expected to act onthis setting by enabling the LPI timer if enable_tx_lpi is set.
- voidphy_disable_eee(structphy_device*phydev)¶
Disable EEE for the PHY
Parameters
structphy_device*phydevTarget phy_device struct
Description
This function is used by MAC drivers for MAC’s which don’t support EEE.It disables EEE on the PHY layer.
- voidphy_support_sym_pause(structphy_device*phydev)¶
Enable support of symmetrical pause
Parameters
structphy_device*phydevtarget phy_device struct
Description
Called by the MAC to indicate is supports symmetricalPause, but not asym pause.
- voidphy_support_asym_pause(structphy_device*phydev)¶
Enable support of asym pause
Parameters
structphy_device*phydevtarget phy_device struct
Description
Called by the MAC to indicate is supports Asym Pause.
- voidphy_set_sym_pause(structphy_device*phydev,boolrx,booltx,boolautoneg)¶
Configure symmetric Pause
Parameters
structphy_device*phydevtarget phy_device struct
boolrxReceiver Pause is supported
booltxTransmit Pause is supported
boolautonegAuto neg should be used
Description
Configure advertised Pause support depending on ifreceiver pause and pause auto neg is supported. Generally calledfrom the set_pauseparam .ndo.
- voidphy_set_asym_pause(structphy_device*phydev,boolrx,booltx)¶
Configure Pause and Asym Pause
Parameters
structphy_device*phydevtarget phy_device struct
boolrxReceiver Pause is supported
booltxTransmit Pause is supported
Description
Configure advertised Pause support depending on iftransmit and receiver pause is supported. If there has been achange in adverting, trigger a new autoneg. Generally called fromthe set_pauseparam .ndo.
- boolphy_validate_pause(structphy_device*phydev,structethtool_pauseparam*pp)¶
Test if the PHY/MAC support the pause configuration
Parameters
structphy_device*phydevphy_device struct
structethtool_pauseparam*pprequested pause configuration
Description
Test if the PHY/MAC combination supports the Pauseconfiguration the user is requesting. Returns True if it issupported, false otherwise.
- voidphy_get_pause(structphy_device*phydev,bool*tx_pause,bool*rx_pause)¶
resolve negotiated pause modes
Parameters
structphy_device*phydevphy_device struct
bool*tx_pausepointer to bool to indicate whether transmit pause should beenabled.
bool*rx_pausepointer to bool to indicate whether receive pause should beenabled.
Description
Resolve and return the flow control modes according to the negotiationresult. This includes checking that we are operating in full duplex mode.Seelinkmode_resolve_pause() for further details.
- s32phy_get_internal_delay(structphy_device*phydev,constint*delay_values,intsize,boolis_rx)¶
returns the index of the internal delay
Parameters
structphy_device*phydevphy_device struct
constint*delay_valuesarray of delays the PHY supports
intsizethe size of the delay array
boolis_rxboolean to indicate to get the rx internal delay
Description
Returns the index within the array of internal delay passed in.If the device property is not present then the interface type is checkedif the interface defines use of internal delay then a 1 is returned otherwisea 0 is returned.The array must be in ascending order. If PHY does not have an ascending orderarray then size = 0 and the value of the delay property is returned.Return -EINVAL if the delay is invalid or cannot be found.
- intphy_get_tx_amplitude_gain(structphy_device*phydev,structdevice*dev,enumethtool_link_mode_bit_indiceslinkmode,u32*val)¶
stores tx amplitude gain inval
Parameters
structphy_device*phydevphy_device struct
structdevice*devpointer to the devices device struct
enumethtool_link_mode_bit_indiceslinkmodelinkmode for which the tx amplitude gain should be retrieved
u32*valtx amplitude gain
Return
0 on success, < 0 on failure
- intphy_get_mac_termination(structphy_device*phydev,structdevice*dev,u32*val)¶
stores MAC termination inval
Parameters
structphy_device*phydevphy_device struct
structdevice*devpointer to the devices device struct
u32*valMAC termination
Return
0 on success, < 0 on failure
- structmdio_device*fwnode_mdio_find_device(structfwnode_handle*fwnode)¶
Given a fwnode, find the mdio_device
Parameters
structfwnode_handle*fwnodepointer to the mdio_device’s fwnode
Description
If successful, returns a pointer to the mdio_device with the embeddedstructdevice refcount incremented by one, or NULL on failure.The caller should callput_device() on the mdio_device after its use.
- structphy_device*fwnode_phy_find_device(structfwnode_handle*phy_fwnode)¶
For provided phy_fwnode, find phy_device.
Parameters
structfwnode_handle*phy_fwnodePointer to the phy’s fwnode.
Description
If successful, returns a pointer to the phy_device with the embeddedstructdevice refcount incremented by one, or NULL on failure.
- structfwnode_handle*fwnode_get_phy_node(conststructfwnode_handle*fwnode)¶
Get the phy_node using the named reference.
Parameters
conststructfwnode_handle*fwnodePointer to fwnode from which phy_node has to be obtained.
Description
Refer return conditions offwnode_find_reference().For ACPI, only “phy-handle” is supported. Legacy DT properties “phy”and “phy-device” are not supported in ACPI. DT supports all the threenamed references to the phy node.
- boolphy_uses_state_machine(structphy_device*phydev)¶
test whether consumer driver uses PAL state machine
Parameters
structphy_device*phydevthe target PHY device structure
Description
Ultimately, this aims to indirectly determine whether the PHY is attachedto a consumer which uses the state machine by callingphy_start() andphy_stop().
When the PHY driver consumer uses phylib, it must have previously calledphy_connect_direct() or one of its derivatives, so thatphy_prepare_link()has set up a hook for monitoring state changes.
When the PHY driver is used by the MAC driver consumer through phylink (theonly other provider of aphy_link_change() method), using the PHY statemachine is not optional.
Return
true if consumer callsphy_start() andphy_stop(), false otherwise.
- intphy_register_fixup(constchar*bus_id,u32phy_uid,u32phy_uid_mask,int(*run)(structphy_device*))¶
creates a new phy_fixup and adds it to the list
Parameters
constchar*bus_idA string which matches phydev->mdio.dev.bus_id (or PHY_ANY_ID)
u32phy_uidUsed to match against phydev->phy_id (the UID of the PHY)It can also be PHY_ANY_UID
u32phy_uid_maskApplied to phydev->phy_id and fixup->phy_uid beforecomparison
int(*run)(structphy_device*)The actual code to be run when a matching PHY is found
- intget_phy_c45_ids(structmii_bus*bus,intaddr,structphy_c45_device_ids*c45_ids)¶
reads the specified addr for its 802.3-c45 IDs.
Parameters
structmii_bus*busthe target MII bus
intaddrPHY address on the MII bus
structphy_c45_device_ids*c45_idswhere to store the c45 ID information.
Description
Read the PHY “devices in package”. If this appears to be valid, readthe PHY identifiers for each device. Return the “devices in package”and identifiers inc45_ids.
Returns zero on success,-EIO on bus access error, or-ENODEV ifthe “devices in package” is invalid or no device responds.
- intget_phy_c22_id(structmii_bus*bus,intaddr,u32*phy_id)¶
reads the specified addr for its clause 22 ID.
Parameters
structmii_bus*busthe target MII bus
intaddrPHY address on the MII bus
u32*phy_idwhere to store the ID retrieved.
Description
Read the 802.3 clause 22 PHY ID from the PHY ataddr on thebus,placing it inphy_id. Return zero on successful read and the ID isvalid,-EIO on bus access error, or-ENODEV if no device respondsor invalid ID.
- voidphy_prepare_link(structphy_device*phydev,void(*handler)(structnet_device*))¶
prepares the PHY layer to monitor link status
Parameters
structphy_device*phydevtarget phy_device struct
void(*handler)(structnet_device*)callback function for link status change notifications
Description
- Tells the PHY infrastructure to handle the
gory details on monitoring link status (whether throughpolling or an interrupt), and to call back to theconnected device driver when the link status changes.If you want to monitor your own link state, don’t callthis function.
- intphy_poll_reset(structphy_device*phydev)¶
Safely wait until a PHY reset has properly completed
Parameters
structphy_device*phydevThe PHY device to poll
Description
- According to IEEE 802.3, Section 2, Subsection 22.2.4.1.1, as
published in 2008, a PHY reset may take up to 0.5 seconds. The MII BMCRregister must be polled until the BMCR_RESET bit clears.
Furthermore, any attempts to write to PHY registers may have no effector even generate MDIO bus errors until this is complete.
Some PHYs (such as the Marvell 88E1111) don’t entirely conform to thestandard and do not fully reset after the BMCR_RESET bit is set, and mayevenREQUIRE a soft-reset to properly restart autonegotiation. In aneffort to support such broken PHYs, this function is separate from thestandard
phy_init_hw()which will zero all the other bits in the BMCRand reapply all driver-specific and board-specific fixups.
- intgenphy_config_advert(structphy_device*phydev,constunsignedlong*advert)¶
sanitize and advertise auto-negotiation parameters
Parameters
structphy_device*phydevtarget phy_device struct
constunsignedlong*advertauto-negotiation parameters to advertise
Description
- Writes MII_ADVERTISE with the appropriate values,
after sanitizing the values to make sure we only advertisewhat is supported. Returns < 0 on error, 0 if the PHY’s advertisementhasn’t changed, and > 0 if it has changed.
- intgenphy_c37_config_advert(structphy_device*phydev)¶
sanitize and advertise auto-negotiation parameters
Parameters
structphy_device*phydevtarget phy_device struct
Description
- Writes MII_ADVERTISE with the appropriate values,
after sanitizing the values to make sure we only advertisewhat is supported. Returns < 0 on error, 0 if the PHY’s advertisementhasn’t changed, and > 0 if it has changed. This function is intendedfor Clause 37 1000Base-X mode.
Parameters
structdevice*devdevice to probe and init
Description
Take care of setting up the phy_device structure, set the state to READY.
- intphy_driver_register(structphy_driver*new_driver,structmodule*owner)¶
register a phy_driver with the PHY layer
Parameters
structphy_driver*new_drivernew phy_driver to register
structmodule*ownermodule owning this PHY
Parameters
constchar*mdio_nameThe name of a mdiobus.
Return
a reference to the mii_bus, or NULL if none found. Theembeddedstructdevice will have its reference count incremented,and this must be put_deviced’ed once the bus is finished with.
- structmii_bus*of_mdio_find_bus(structdevice_node*mdio_bus_np)¶
Given an mii_bus node, find the mii_bus.
Parameters
structdevice_node*mdio_bus_npPointer to the mii_bus.
Return
a reference to the mii_bus, or NULL if none found. Theembeddedstructdevice will have its reference count incremented,and this must be put once the bus is finished with.
Description
Because the association of a device_node and mii_bus is made viaof_mdiobus_register(), the mii_bus cannot be found before it isregistered withof_mdiobus_register().
- int__mdiobus_read(structmii_bus*bus,intaddr,u32regnum)¶
Unlocked version of the mdiobus_read function
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
u32regnumregister number to read
Return
The register value if successful, negative error code on failure
Description
Read a MDIO bus register. Caller must hold the mdio bus lock.
NOTE
MUST NOT be called from interrupt context.
- int__mdiobus_write(structmii_bus*bus,intaddr,u32regnum,u16val)¶
Unlocked version of the mdiobus_write function
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
u32regnumregister number to write
u16valvalue to write toregnum
Return
Zero if successful, negative error code on failure
Description
Write a MDIO bus register. Caller must hold the mdio bus lock.
NOTE
MUST NOT be called from interrupt context.
- int__mdiobus_modify_changed(structmii_bus*bus,intaddr,u32regnum,u16mask,u16set)¶
Unlocked version of the mdiobus_modify function
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
u32regnumregister number to modify
u16maskbit mask of bits to clear
u16setbit mask of bits to set
Return
1 if the register was modified, 0 if no change was needed,negative on any error condition
Description
Read, modify, and if any change, write the register value back to thedevice.
NOTE
MUST NOT be called from interrupt context.
- int__mdiobus_c45_read(structmii_bus*bus,intaddr,intdevad,u32regnum)¶
Unlocked version of the mdiobus_c45_read function
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
intdevaddevice address to read
u32regnumregister number to read
Return
The register value if successful, negative error code on failure
Description
Read a MDIO bus register. Caller must hold the mdio bus lock.
NOTE
MUST NOT be called from interrupt context.
- int__mdiobus_c45_write(structmii_bus*bus,intaddr,intdevad,u32regnum,u16val)¶
Unlocked version of the mdiobus_write function
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
intdevaddevice address to read
u32regnumregister number to write
u16valvalue to write toregnum
Return
Zero if successful, negative error code on failure
Description
Write a MDIO bus register. Caller must hold the mdio bus lock.
NOTE
MUST NOT be called from interrupt context.
- intmdiobus_read_nested(structmii_bus*bus,intaddr,u32regnum)¶
Nested version of the mdiobus_read function
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
u32regnumregister number to read
Return
The register value if successful, negative error code on failure
Description
In case of nested MDIO bus access avoid lockdep false positives byusingmutex_lock_nested().
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- intmdiobus_read(structmii_bus*bus,intaddr,u32regnum)¶
Convenience function for reading a given MII mgmt register
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
u32regnumregister number to read
Return
The register value if successful, negative error code on failure
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- intmdiobus_c45_read(structmii_bus*bus,intaddr,intdevad,u32regnum)¶
Convenience function for reading a given MII mgmt register
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
intdevaddevice address to read
u32regnumregister number to read
Return
The register value if successful, negative error code on failure
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- intmdiobus_c45_read_nested(structmii_bus*bus,intaddr,intdevad,u32regnum)¶
Nested version of the mdiobus_c45_read function
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
intdevaddevice address to read
u32regnumregister number to read
Return
The register value if successful, negative error code on failure
Description
In case of nested MDIO bus access avoid lockdep false positives byusingmutex_lock_nested().
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- intmdiobus_write_nested(structmii_bus*bus,intaddr,u32regnum,u16val)¶
Nested version of the mdiobus_write function
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
u32regnumregister number to write
u16valvalue to write toregnum
Return
Zero if successful, negative error code on failure
Description
In case of nested MDIO bus access avoid lockdep false positives byusingmutex_lock_nested().
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- intmdiobus_write(structmii_bus*bus,intaddr,u32regnum,u16val)¶
Convenience function for writing a given MII mgmt register
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
u32regnumregister number to write
u16valvalue to write toregnum
Return
Zero if successful, negative error code on failure
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- intmdiobus_c45_write(structmii_bus*bus,intaddr,intdevad,u32regnum,u16val)¶
Convenience function for writing a given MII mgmt register
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
intdevaddevice address to read
u32regnumregister number to write
u16valvalue to write toregnum
Return
Zero if successful, negative error code on failure
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- intmdiobus_c45_write_nested(structmii_bus*bus,intaddr,intdevad,u32regnum,u16val)¶
Nested version of the mdiobus_c45_write function
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
intdevaddevice address to read
u32regnumregister number to write
u16valvalue to write toregnum
Return
Zero if successful, negative error code on failure
Description
In case of nested MDIO bus access avoid lockdep false positives byusingmutex_lock_nested().
NOTE
MUST NOT be called from interrupt context,because the bus read/write functions may wait for an interruptto conclude the operation.
- intmdiobus_modify(structmii_bus*bus,intaddr,u32regnum,u16mask,u16set)¶
Convenience function for modifying a given mdio device register
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
u32regnumregister number to write
u16maskbit mask of bits to clear
u16setbit mask of bits to set
Return
0 on success, negative on any error condition
- intmdiobus_c45_modify(structmii_bus*bus,intaddr,intdevad,u32regnum,u16mask,u16set)¶
Convenience function for modifying a given mdio device register
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
intdevaddevice address to read
u32regnumregister number to write
u16maskbit mask of bits to clear
u16setbit mask of bits to set
Return
0 on success, negative on any error condition
- intmdiobus_modify_changed(structmii_bus*bus,intaddr,u32regnum,u16mask,u16set)¶
Convenience function for modifying a given mdio device register and returning if it changed
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
u32regnumregister number to write
u16maskbit mask of bits to clear
u16setbit mask of bits to set
Return
1 if the register was modified, 0 if no change was needed,negative on any error condition
- intmdiobus_c45_modify_changed(structmii_bus*bus,intaddr,intdevad,u32regnum,u16mask,u16set)¶
Convenience function for modifying a given mdio device register and returning if it changed
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
intdevaddevice address to read
u32regnumregister number to write
u16maskbit mask of bits to clear
u16setbit mask of bits to set
Return
1 if the register was modified, 0 if no change was needed,negative on any error condition
Parameters
structdevice*dthe target
structdevicethat contains the mii_bus
Description
called when the last reference to an mii_bus isdropped, to free the underlying memory.
- int__mdiobus_c45_modify_changed(structmii_bus*bus,intaddr,intdevad,u32regnum,u16mask,u16set)¶
Unlocked version of the mdiobus_modify function
Parameters
structmii_bus*busthe mii_bus struct
intaddrthe phy address
intdevaddevice address to read
u32regnumregister number to modify
u16maskbit mask of bits to clear
u16setbit mask of bits to set
Return
1 if the register was modified, 0 if no change was needed,negative on any error condition
Description
Read, modify, and if any change, write the register value back to thedevice. Any error returns a negative number.
NOTE
MUST NOT be called from interrupt context.
- intmdio_bus_match(structdevice*dev,conststructdevice_driver*drv)¶
determine if given MDIO driver supports the given MDIO device
Parameters
structdevice*devtarget MDIO device
conststructdevice_driver*drvgiven MDIO driver
Return
1 if the driver supports the device, 0 otherwise
Description
- This may require calling the devices own match function,
since different classes of MDIO devices have different match criteria.
PHYLINK¶
PHYLINK interfaces traditional network drivers with PHYLIB, fixed-links,and SFF modules (eg, hot-pluggable SFP) that may contain PHYs. PHYLINKprovides management of the link state and link modes.
- structphylink_link_state¶
link state structure
Definition:
struct phylink_link_state { unsigned long advertising[BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS)]; unsigned long lp_advertising[BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS)]; phy_interface_t interface; int speed; int duplex; int pause; int rate_matching; unsigned int link:1; unsigned int an_complete:1;};Members
advertisingethtool bitmask containing advertised link modes
lp_advertisingethtool bitmask containing link partner advertised linkmodes
interfacelink
typedefphy_interface_tmodespeedlink speed, one of the SPEED_* constants.
duplexlink duplex mode, one of DUPLEX_* constants.
pauselink pause state, described by MLO_PAUSE_* constants.
rate_matchingrate matching being performed, one of the RATE_MATCH_*constants. If rate matching is taking place, then the speed/duplex ofthe medium link mode (speed andduplex) and the speed/duplex of the phyinterface mode (interface) are different.
linktrue if the link is up.
an_completetrue if autonegotiation has completed.
- structphylink_config¶
PHYLINK configuration structure
Definition:
struct phylink_config { struct device *dev; enum phylink_op_type type; bool poll_fixed_state; bool mac_managed_pm; bool mac_requires_rxc; bool default_an_inband; bool eee_rx_clk_stop_enable; void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); unsigned long supported_interfaces[BITS_TO_LONGS(PHY_INTERFACE_MODE_MAX)]; unsigned long lpi_interfaces[BITS_TO_LONGS(PHY_INTERFACE_MODE_MAX)]; unsigned long mac_capabilities; unsigned long lpi_capabilities; u32 lpi_timer_default; bool eee_enabled_default; bool wol_phy_legacy; bool wol_phy_speed_ctrl; u32 wol_mac_support;};Members
deva pointer to a
structdeviceassociated with the MACtypeoperation type of PHYLINK instance
poll_fixed_stateif true, starts link_poll,if MAC link is at
MLO_AN_FIXEDmode.mac_managed_pmif true, indicate the MAC driver is responsible for PHY PM.
mac_requires_rxcif true, the MAC always requires a receive clock from PHY.The PHY driver should start the clock signal as soon aspossible and avoid stopping it during suspend events.
default_an_inbandif true, defaults to MLO_AN_INBAND rather thanMLO_AN_PHY. A fixed-link specification will override.
eee_rx_clk_stop_enableif true, PHY can stop the receive clock during LPI
get_fixed_statecallback to execute to determine the fixed link state,if MAC link is at
MLO_AN_FIXEDmode.supported_interfacesbitmap describing which PHY_INTERFACE_MODE_xxxare supported by the MAC/PCS.
lpi_interfacesbitmap describing which PHY interface modes can supportLPI signalling.
mac_capabilitiesMAC pause/speed/duplex capabilities.
lpi_capabilitiesMAC speeds which can support LPI signalling
lpi_timer_defaultDefault EEE LPI timer setting.
eee_enabled_defaultIf set, EEE will be enabled by phylink at creation time
wol_phy_legacyUse Wake-on-Lan with PHY even if
phy_can_wakeup()is falsewol_phy_speed_ctrlUse phy speed control on suspend/resume
wol_mac_supportBitmask of MAC supported
WAKE_*options
- structphylink_mac_ops¶
MAC operations structure.
Definition:
struct phylink_mac_ops { unsigned long (*mac_get_caps)(struct phylink_config *config, phy_interface_t interface); struct phylink_pcs *(*mac_select_pcs)(struct phylink_config *config, phy_interface_t interface); int (*mac_prepare)(struct phylink_config *config, unsigned int mode, phy_interface_t iface); void (*mac_config)(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state); int (*mac_finish)(struct phylink_config *config, unsigned int mode, phy_interface_t iface); void (*mac_link_down)(struct phylink_config *config, unsigned int mode, phy_interface_t interface); void (*mac_link_up)(struct phylink_config *config, struct phy_device *phy, unsigned int mode, phy_interface_t interface, int speed, int duplex, bool tx_pause, bool rx_pause); void (*mac_disable_tx_lpi)(struct phylink_config *config); int (*mac_enable_tx_lpi)(struct phylink_config *config, u32 timer, bool tx_clk_stop); int (*mac_wol_set)(struct phylink_config *config, u32 wolopts, const u8 *sopass);};Members
mac_get_capsGet MAC capabilities for interface mode.
mac_select_pcsSelect a PCS for the interface mode.
mac_prepareprepare for a major reconfiguration of the interface.
mac_configconfigure the MAC for the selected mode and state.
mac_finishfinish a major reconfiguration of the interface.
mac_link_downtake the link down.
mac_link_upallow the link to come up.
mac_disable_tx_lpidisable LPI.
mac_enable_tx_lpienable and configure LPI.
mac_wol_setconfigure Wake-on-Lan settings at the MAC.
Description
The individual methods are described more fully below.
- unsignedlongmac_get_caps(structphylink_config*config,phy_interface_tinterface)¶
Get MAC capabilities for interface mode.
Parameters
structphylink_config*configa pointer to a
structphylink_config.phy_interface_tinterfacePHY interface mode.
Description
Optional method. When not provided, config->mac_capabilities will be used.When implemented, this returns the MAC capabilities for the specifiedinterface mode where there is some special handling required by the MACdriver (e.g. not supporting half-duplex in certain interface modes.)
- structphylink_pcs*mac_select_pcs(structphylink_config*config,phy_interface_tinterface)¶
Select a PCS for the interface mode.
Parameters
structphylink_config*configa pointer to a
structphylink_config.phy_interface_tinterfacePHY interface mode for PCS
Description
Return thestructphylink_pcs for the specified interface mode, orNULL if none is required, or an error pointer on error.
This must not modify any state. It is used to query which PCS shouldbe used. Phylink will use this during validation to ensure that theconfiguration is valid, and when setting a configuration to internallyset the PCS that will be used.
- intmac_prepare(structphylink_config*config,unsignedintmode,phy_interface_tiface)¶
prepare to change the PHY interface mode
Parameters
structphylink_config*configa pointer to a
structphylink_config.unsignedintmodeone of
MLO_AN_FIXED,MLO_AN_PHY,MLO_AN_INBAND.phy_interface_tifaceinterface mode to switch to
Description
phylink will call this method at the beginning of a full initialisationof the link, which includes changing the interface mode or at initialstartup time. It may be called for the current mode. The MAC drivershould perform whatever actions are required, e.g. disabling theSerdes PHY.
This will be the first call in the sequence:-mac_prepare()-mac_config()-pcs_config()- possiblepcs_an_restart()-mac_finish()
Returns zero on success, or negative errno on failure which will bereported to the kernel log.
- voidmac_config(structphylink_config*config,unsignedintmode,conststructphylink_link_state*state)¶
configure the MAC for the selected mode and state
Parameters
structphylink_config*configa pointer to a
structphylink_config.unsignedintmodeone of
MLO_AN_FIXED,MLO_AN_PHY,MLO_AN_INBAND.conststructphylink_link_state*statea pointer to a
structphylink_link_state.
Description
Note - not all members ofstate are valid. In particular,state->lp_advertising,state->link,state->an_complete are neverguaranteed to be correct, and so anymac_config() implementation mustnever reference these fields.
This will only be called to reconfigure the MAC for a “major” change ine.g. interface mode. It will not be called for changes in speed, duplexor pause modes or to change the in-band advertisement.
In all negotiation modes, as defined bymode,state->pause indicates thepause settings which should be applied as follows. IfMLO_PAUSE_AN is notset,MLO_PAUSE_TX andMLO_PAUSE_RX indicate whether the MAC should sendpause frames and/or act on received pause frames respectively. Otherwise,the results of in-band negotiation/status from the MAC PCS should be usedto control the MAC pause mode settings.
The action performed depends on the currently selected mode:
MLO_AN_FIXED,MLO_AN_PHY:Configure for non-inband negotiation mode, where the link settingsare completely communicated via
mac_link_up(). The physical linkprotocol from the MAC is specified bystate->interface.state->advertising may be used, but is not required.
Older drivers (prior to the
mac_link_up()change) may usestate->speed,state->duplex andstate->pause to configure the MAC, but this isdeprecated; such drivers should be converted to usemac_link_up().Other members ofstate must be ignored.
Valid state members: interface, advertising.Deprecated state members: speed, duplex, pause.
MLO_AN_INBAND:place the link in an inband negotiation mode (such as 802.3z1000base-X or Cisco SGMII mode depending on thestate->interfacemode). In both cases, link state management (whether the linkis up or not) is performed by the MAC, and reported via the
pcs_get_state()callback. Changes in link state must be madeby callingphylink_mac_change().Interface mode specific details are mentioned below.
If in 802.3z mode, the link speed is fixed, dependent on thestate->interface. Duplex and pause modes are negotiated viathe in-band configuration word. Advertised pause modes are setaccording tostate->advertising. Beware of MACs which onlysupport full duplex at gigabit and higher speeds.
If in Cisco SGMII mode, the link speed and duplex mode are passedin the serial bitstream 16-bit configuration word, and the MACshould be configured to read these bits and acknowledge theconfiguration word. Nothing is advertised by the MAC. The MAC isresponsible for reading the configuration word and configuringitself accordingly.
Valid state members: interface, pause, advertising.
Implementations are expected to update the MAC to reflect therequested settings - i.o.w., if nothing has changed between twocalls, no action is expected. If only flow control settings havechanged, flow control should be updatedwithout taking the linkdown. This “update” behaviour is critical to avoid bouncing thelink up status.
- intmac_finish(structphylink_config*config,unsignedintmode,phy_interface_tiface)¶
finish a to change the PHY interface mode
Parameters
structphylink_config*configa pointer to a
structphylink_config.unsignedintmodeone of
MLO_AN_FIXED,MLO_AN_PHY,MLO_AN_INBAND.phy_interface_tifaceinterface mode to switch to
Description
phylink will call this if it calledmac_prepare() to allow the MAC tocomplete any necessary steps after the MAC and PCS have been configuredfor themode andiface. E.g. a MAC driver may wish to re-enable theSerdes PHY here if it was previously disabled bymac_prepare().
Returns zero on success, or negative errno on failure which will bereported to the kernel log.
- voidmac_link_down(structphylink_config*config,unsignedintmode,phy_interface_tinterface)¶
notification that the link has gone down
Parameters
structphylink_config*configa pointer to a
structphylink_config.unsignedintmodelink autonegotiation mode
phy_interface_tinterfacelink
typedefphy_interface_tmode
Description
Notifies the MAC that the link has gone down. This will not be calledunlessmac_link_up() has been previously called.
The MAC should stop processing packets for transmission and reception.phylink will have callednetif_carrier_off() to notify the networkingstack that the link has gone down, so MAC drivers should not make thiscall.
Ifmode isMLO_AN_INBAND, then this function must not prevent thelink coming up.
- voidmac_link_up(structphylink_config*config,structphy_device*phy,unsignedintmode,phy_interface_tinterface,intspeed,intduplex,booltx_pause,boolrx_pause)¶
notification that the link has come up
Parameters
structphylink_config*configa pointer to a
structphylink_config.structphy_device*phyany attached phy (deprecated - please use LPI interfaces)
unsignedintmodelink autonegotiation mode
phy_interface_tinterfacelink
typedefphy_interface_tmodeintspeedlink speed
intduplexlink duplex
booltx_pauselink transmit pause enablement status
boolrx_pauselink receive pause enablement status
Description
Notifies the MAC that the link has come up, and the parameters of thelink as seen from the MACs point of view. Ifmac_link_up() has beencalled previously, there will be an intervening call tomac_link_down()before this method will be subsequently called.
speed,duplex,tx_pause andrx_pause indicate the finalised linksettings, and should be used to configure the MAC block appropriatelywhere these settings are not automatically conveyed from the PCS block,or if in-band negotiation (as defined by phylink_autoneg_inband(mode))is disabled.
Note that when 802.3z in-band negotiation is in use, it is possiblethat the user wishes to override the pause settings, and this shouldbe allowed when considering the implementation of this method.
Once configured, the MAC may begin to process packets for transmissionand reception.
Interface type selection must be done inmac_config().
- voidmac_disable_tx_lpi(structphylink_config*config)¶
disable LPI generation at the MAC
Parameters
structphylink_config*configa pointer to a
structphylink_config.
Description
Disable generation of LPI at the MAC, effectively preventing the MACfrom indicating that it is idle.
- intmac_enable_tx_lpi(structphylink_config*config,u32timer,booltx_clk_stop)¶
configure and enable LPI generation at the MAC
Parameters
structphylink_config*configa pointer to a
structphylink_config.u32timerLPI timeout in microseconds.
booltx_clk_stopallow xMII transmit clock to be stopped during LPI
Description
Configure the LPI timeout accordingly. This will only be called whenthe link is already up, to cater for situations where the hardwareneeds to be programmed according to the link speed.
Enable LPI generation at the MAC, and configure whether the xMII transmitclock may be stopped.
Return
0 on success. Please consult with rmk before returning an error.
- intmac_wol_set(structphylink_config*config,u32wolopts,constu8*sopass)¶
configure the Wake-on-Lan parameters
Parameters
structphylink_config*configa pointer to a
structphylink_config.u32woloptsBitmask of
WAKE_*flags for enabled Wake-On-Lan modes.constu8*sopassSecureOn(tm) password; meaningful only for
WAKE_MAGICSECURE
Description
Enable the specified Wake-on-Lan options at the MAC. Options that thePHY can handle will have been removed fromwolopts.
The presence of this method enables phylink-managed WoL support.
Return
0 on success.
- structphylink_pcs¶
PHYLINK PCS instance
Definition:
struct phylink_pcs { unsigned long supported_interfaces[BITS_TO_LONGS(PHY_INTERFACE_MODE_MAX)]; const struct phylink_pcs_ops *ops; struct phylink *phylink; bool poll; bool rxc_always_on;};Members
supported_interfacesdescribing which PHY_INTERFACE_MODE_xxxare supported by this PCS.
opsa pointer to the
structphylink_pcs_opsstructurephylinkpointer to
structphylink_configpollpoll the PCS for link changes
rxc_always_onThe MAC driver requires the reference clockto always be on. Standalone PCS drivers whichdo not have access to a PHY device can checkthis instead of PHY_F_RXC_ALWAYS_ON.
Description
This structure is designed to be embedded within the PCS private data,and will be passed between phylink and the PCS.
Thephylink member is private to phylink and must not be touched bythe PCS driver.
- structphylink_pcs_ops¶
MAC PCS operations structure.
Definition:
struct phylink_pcs_ops { int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported, const struct phylink_link_state *state); unsigned int (*pcs_inband_caps)(struct phylink_pcs *pcs, phy_interface_t interface); int (*pcs_enable)(struct phylink_pcs *pcs); void (*pcs_disable)(struct phylink_pcs *pcs); void (*pcs_pre_config)(struct phylink_pcs *pcs, phy_interface_t interface); int (*pcs_post_config)(struct phylink_pcs *pcs, phy_interface_t interface); void (*pcs_get_state)(struct phylink_pcs *pcs, unsigned int neg_mode, struct phylink_link_state *state); int (*pcs_config)(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac); void (*pcs_an_restart)(struct phylink_pcs *pcs); void (*pcs_link_up)(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); void (*pcs_disable_eee)(struct phylink_pcs *pcs); void (*pcs_enable_eee)(struct phylink_pcs *pcs); int (*pcs_pre_init)(struct phylink_pcs *pcs);};Members
pcs_validatevalidate the link configuration.
pcs_inband_capsquery inband support for interface mode.
pcs_enableenable the PCS.
pcs_disabledisable the PCS.
pcs_pre_configpre-mac_config method (for errata)
pcs_post_configpost-mac_config method (for arrata)
pcs_get_stateread the current MAC PCS link state from the hardware.
pcs_configconfigure the MAC PCS for the selected mode and state.
pcs_an_restartrestart 802.3z BaseX autonegotiation.
pcs_link_upprogram the PCS for the resolved link configuration(where necessary).
pcs_disable_eeeoptional notification to PCS that EEE has been disabledat the MAC.
pcs_enable_eeeoptional notification to PCS that EEE will be enabled atthe MAC.
pcs_pre_initconfigure PCS components necessary for MAC hardwareinitialization e.g. RX clock for stmmac.
- intpcs_validate(structphylink_pcs*pcs,unsignedlong*supported,conststructphylink_link_state*state)¶
validate the link configuration.
Parameters
structphylink_pcs*pcsa pointer to a
structphylink_pcs.unsignedlong*supportedethtool bitmask for supported link modes.
conststructphylink_link_state*statea const pointer to a
structphylink_link_state.
Description
Validate the interface mode, and advertising’s autoneg bit, removing anymedia ethtool link modes that would not be supportable from the supportedmask. Phylink will propagate the changes to the advertising mask. See thestructphylink_mac_opsvalidate() method.
Returns -EINVAL if the interface mode/autoneg mode is not supported.Returns non-zero positive if the link state can be supported.
- unsignedintpcs_inband_caps(structphylink_pcs*pcs,phy_interface_tinterface)¶
query PCS in-band capabilities for interface mode.
Parameters
structphylink_pcs*pcsa pointer to a
structphylink_pcs.phy_interface_tinterfaceinterface mode to be queried
Description
Returns zero if it is unknown what in-band signalling is supported by thePHY (e.g. because the PHY driver doesn’t implement the method.) Otherwise,returns a bit mask of the LINK_INBAND_* values fromenumlink_inband_signalling to describe which inband modes are supportedfor this interface mode.
- intpcs_enable(structphylink_pcs*pcs)¶
enable the PCS.
Parameters
structphylink_pcs*pcsa pointer to a
structphylink_pcs.
- voidpcs_disable(structphylink_pcs*pcs)¶
disable the PCS.
Parameters
structphylink_pcs*pcsa pointer to a
structphylink_pcs.
- voidpcs_get_state(structphylink_pcs*pcs,unsignedintneg_mode,structphylink_link_state*state)¶
Read the current inband link state from the hardware
Parameters
structphylink_pcs*pcsa pointer to a
structphylink_pcs.unsignedintneg_modelink negotiation mode (PHYLINK_PCS_NEG_xxx)
structphylink_link_state*statea pointer to a
structphylink_link_state.
Description
Read the current inband link state from the MAC PCS, reporting thecurrent speed instate->speed, duplex mode instate->duplex, pausemode instate->pause using theMLO_PAUSE_RX andMLO_PAUSE_TX bits,negotiation completion state instate->an_complete, and link up stateinstate->link. If possible,state->lp_advertising should also bepopulated.
Note that theneg_mode parameter is always the PHYLINK_PCS_NEG_xxxstate, not MLO_AN_xxx.
- intpcs_config(structphylink_pcs*pcs,unsignedintneg_mode,phy_interface_tinterface,constunsignedlong*advertising,boolpermit_pause_to_mac)¶
Configure the PCS mode and advertisement
Parameters
structphylink_pcs*pcsa pointer to a
structphylink_pcs.unsignedintneg_modelink negotiation mode (see below)
phy_interface_tinterfaceinterface mode to be used
constunsignedlong*advertisingadertisement ethtool link mode mask
boolpermit_pause_to_macpermit forwarding pause resolution to MAC
Description
Configure the PCS for the operating mode, the interface mode, and setthe advertisement mask.permit_pause_to_mac indicates whether thehardware may forward the pause mode resolution to the MAC.
When operating inMLO_AN_INBAND, inband should always be enabled,otherwise inband should be disabled.
For SGMII, there is no advertisement from the MAC side, the PCS shouldbe programmed to acknowledge the inband word from the PHY.
For 1000BASE-X, the advertisement should be programmed into the PCS.
For most 10GBASE-R, there is no advertisement.
Theneg_mode argument should be tested via the phylink_mode_*() family offunctions, or for PCS that set pcs->neg_mode true, should be testedagainst the PHYLINK_PCS_NEG_* definitions.
pcs_config() will be called when configuration of the PCS is requiredor when the advertisement is possibly updated. It must not unnecessarilydisrupt an established link.
When an autonegotiation restart is required for 802.3z modes, .pcs_config()should return a positive non-zero integer (e.g. 1) to indicate to phylinkto call thepcs_an_restart() method.
- voidpcs_an_restart(structphylink_pcs*pcs)¶
restart 802.3z BaseX autonegotiation
Parameters
structphylink_pcs*pcsa pointer to a
structphylink_pcs.
Description
When PCS ops are present, this overridesmac_an_restart() instructphylink_mac_ops.
- voidpcs_link_up(structphylink_pcs*pcs,unsignedintneg_mode,phy_interface_tinterface,intspeed,intduplex)¶
program the PCS for the resolved link configuration
Parameters
structphylink_pcs*pcsa pointer to a
structphylink_pcs.unsignedintneg_modelink negotiation mode (see below)
phy_interface_tinterfacelink
typedefphy_interface_tmodeintspeedlink speed
intduplexlink duplex
Description
This call will be made just beforemac_link_up() to inform the PCS ofthe resolved link parameters. For example, a PCS operating in SGMIImode without in-band AN needs to be manually configured for the linkand duplex setting. Otherwise, this should be a no-op.
Themode argument should be tested via the phylink_mode_*() family offunctions, or for PCS that set pcs->neg_mode true, should be testedagainst the PHYLINK_PCS_NEG_* definitions.
- voidpcs_disable_eee(structphylink_pcs*pcs)¶
Disable EEE at the PCS
Parameters
structphylink_pcs*pcsa pointer to a
structphylink_pcs
Description
Optional method informing the PCS that EEE has been disabled at the MAC.
- voidpcs_enable_eee(structphylink_pcs*pcs)¶
Enable EEE at the PCS
Parameters
structphylink_pcs*pcsa pointer to a
structphylink_pcs
Description
Optional method informing the PCS that EEE is about to be enabled at the MAC.
- intpcs_pre_init(structphylink_pcs*pcs)¶
Configure PCS components necessary for MAC initialization
Parameters
structphylink_pcs*pcsa pointer to a
structphylink_pcs.
Description
This function can be called by MAC drivers through thephylink_pcs_pre_init() wrapper, before their hardware is initialized. Itshould not be called after the link is brought up, as reconfiguring the PCSat this point could break the link.
Some MAC devices require specific hardware initialization to be performed bytheir associated PCS device before they can properly initialize their ownhardware. An example of this is the initialization of stmmac controllers,which requires an active REF_CLK signal to be provided by the PHY/PCS.
By callingphylink_pcs_pre_init(), MAC drivers can ensure that the PCS issetup in a way that allows for successful hardware initialization.
The specific configuration performed bypcs_pre_init() is dependent on themodel of PCS and the requirements of the MAC device attached to it. PCSdriver authors should consider whether their target device is to be used inconjunction with a MAC device whose driver callsphylink_pcs_pre_init(). MACdriver authors should document their requirements for the PCSpre-initialization.
- intphylink_get_link_timer_ns(phy_interface_tinterface)¶
return the PCS link timer value
Parameters
phy_interface_tinterfacelink
typedefphy_interface_tmode
Description
Return the PCS link timer setting in nanoseconds for the PHYinterfacemode, or -EINVAL if not appropriate.
- boolphylink_mac_implements_lpi(conststructphylink_mac_ops*ops)¶
determine if MAC implements LPI ops
Parameters
conststructphylink_mac_ops*opsphylink_mac_ops structure
Description
Returns true if the phylink MAC operations structure indicates that theLPI operations have been implemented, false otherwise.
- structphylink¶
internal data type for phylink
Definition:
struct phylink {};Members
- voidphylink_set_port_modes(unsignedlong*mask)¶
set the port type modes in the ethtool mask
Parameters
unsignedlong*maskethtool link mode mask
Description
Sets all the port type modes in the ethtool mask. MAC drivers shoulduse this in their ‘validate’ callback.
- intphylink_interface_max_speed(phy_interface_tinterface)¶
get the maximum speed of a phy interface
Parameters
phy_interface_tinterfacephy interface mode defined by
typedefphy_interface_t
Description
Determine the maximum speed of a phy interface. This is intended to helpdetermine the correct speed to pass to the MAC when the phy is performingrate matching.
Return
The maximum speed ofinterface
- unsignedlongphylink_caps_to_link_caps(unsignedlongcaps)¶
Convert a set of MAC capabilities LINK caps
Parameters
unsignedlongcapsA set of MAC capabilities
Return
The corresponding set of LINK_CAPA as defined in phy-caps.h
- voidphylink_caps_to_linkmodes(unsignedlong*linkmodes,unsignedlongcaps)¶
Convert capabilities to ethtool link modes
Parameters
unsignedlong*linkmodesethtool linkmode mask (must be already initialised)
unsignedlongcapsbitmask of MAC capabilities
Description
Set all possible pause, speed and duplex linkmodes inlinkmodes that aresupported by thecaps.linkmodes must have been initialised previously.
- voidphylink_limit_mac_speed(structphylink_config*config,u32max_speed)¶
limit the phylink_config to a maximum speed
Parameters
structphylink_config*configpointer to a
structphylink_configu32max_speedmaximum speed
Description
Mask off MAC capabilities for speeds higher than themax_speed parameter.Any further motifications of config.mac_capabilities will override this.
- unsignedlongphylink_cap_from_speed_duplex(intspeed,unsignedintduplex)¶
Get mac capability from speed/duplex
Parameters
intspeedthe speed to search for
unsignedintduplexthe duplex to search for
Description
Find the mac capability for a given speed and duplex.
Return
A mask with the mac capability patchingspeed andduplex, or 0 ifthere were no matches.
- unsignedlongphylink_get_capabilities(phy_interface_tinterface,unsignedlongmac_capabilities,intrate_matching)¶
get capabilities for a given MAC
Parameters
phy_interface_tinterfacephy interface mode defined by
typedefphy_interface_tunsignedlongmac_capabilitiesbitmask of MAC capabilities
intrate_matchingtype of rate matching being performed
Description
Get the MAC capabilities that are supported by theinterface mode andmac_capabilities.
- voidphylink_validate_mask_caps(unsignedlong*supported,structphylink_link_state*state,unsignedlongmac_capabilities)¶
Restrict link modes based on caps
Parameters
unsignedlong*supportedethtool bitmask for supported link modes.
structphylink_link_state*statepointer to a
structphylink_link_state.unsignedlongmac_capabilitiesbitmask of MAC capabilities
Description
Calculate the supported link modes based onmac_capabilities, and restrictsupported andstate based on that. Use this function if your capabiliiesaren’t constant, such as if they vary depending on the interface.
- voidphylink_pcs_neg_mode(structphylink*pl,structphylink_pcs*pcs,phy_interface_tinterface,constunsignedlong*advertising)¶
helper to determine PCS inband mode
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()structphylink_pcs*pcsa pointer to
structphylink_pcsphy_interface_tinterfaceinterface mode to be used
constunsignedlong*advertisingadertisement ethtool link mode mask
Description
Determines the negotiation mode to be used by the PCS, and returnsone of:
PHYLINK_PCS_NEG_NONE: interface mode does not support inbandPHYLINK_PCS_NEG_OUTBAND: an out of band mode (e.g. reading the PHY)will be used.PHYLINK_PCS_NEG_INBAND_DISABLED: inband mode selected but autonegdisabledPHYLINK_PCS_NEG_INBAND_ENABLED: inband mode selected and autoneg enabled
Note
this is for cases where the PCS itself is involved in negotiation(e.g. Clause 37, SGMII and similar) not Clause 73.
- intphylink_set_fixed_link(structphylink*pl,conststructphylink_link_state*state)¶
set the fixed link
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()conststructphylink_link_state*statea pointer to a
structphylink_link_state.
Description
This function is used when the link parameters are known and do not change,making it suitable for certain types of network connections.
Return
zero on success or negative error code.
- structphylink*phylink_create(structphylink_config*config,conststructfwnode_handle*fwnode,phy_interface_tiface,conststructphylink_mac_ops*mac_ops)¶
create a phylink instance
Parameters
structphylink_config*configa pointer to the target
structphylink_configconststructfwnode_handle*fwnodea pointer to a
structfwnode_handledescribing the networkinterfacephy_interface_tifacethe desired link mode defined by
typedefphy_interface_tconststructphylink_mac_ops*mac_opsa pointer to a
structphylink_mac_opsfor the MAC.
Description
Create a new phylink instance, and parse the link parameters found innp.This will parse in-band modes, fixed-link or SFP configuration.
Note
the rtnl lock must not be held when calling this function.
Returns a pointer to astructphylink, or an error-pointer value. Usersmust useIS_ERR() to check for errors from this function.
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()
Description
Destroy a phylink instance. Any PHY that has been attached must have beencleaned up viaphylink_disconnect_phy() prior to calling this function.
Note
the rtnl lock must not be held when calling this function.
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()
Description
When using fixed-link mode, or in-band mode with 1000base-X or 2500base-X,no PHY is needed.
Returns true if phylink will be expecting a PHY.
- intphylink_connect_phy(structphylink*pl,structphy_device*phy)¶
connect a PHY to the phylink instance
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()structphy_device*phya pointer to a
structphy_device.
Description
Connectphy to the phylink instance specified bypl by callingphy_attach_direct(). Configure thephy according to the MAC driver’scapabilities, start the PHYLIB state machine and enable any interruptsthat the PHY supports.
This updates the phylink’s ethtool supported and advertising link modemasks.
Returns 0 on success or a negative errno.
- intphylink_of_phy_connect(structphylink*pl,structdevice_node*dn,u32flags)¶
connect the PHY specified in the DT mode.
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()structdevice_node*dna pointer to a
structdevice_node.u32flagsPHY-specific flags to communicate to the PHY device driver
Description
Connect the phy specified in the device nodedn to the phylink instancespecified bypl. Actions specified inphylink_connect_phy() will beperformed.
Returns 0 on success or a negative errno.
- intphylink_fwnode_phy_connect(structphylink*pl,conststructfwnode_handle*fwnode,u32flags)¶
connect the PHY specified in the fwnode.
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()conststructfwnode_handle*fwnodea pointer to a
structfwnode_handle.u32flagsPHY-specific flags to communicate to the PHY device driver
Description
Connect the phy specifiedfwnode to the phylink instance specifiedbypl.
Returns 0 on success or a negative errno.
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()
Description
Disconnect any current PHY from the phylink instance described bypl.
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()boolupindicates whether the link is currently up.
Description
The MAC driver should call this driver when the state of its linkchanges (eg, link failure, new negotiation results, etc.)
- voidphylink_pcs_change(structphylink_pcs*pcs,boolup)¶
notify phylink of a change to PCS link state
Parameters
structphylink_pcs*pcspointer to
structphylink_pcsboolupindicates whether the link is currently up.
Description
The PCS driver should call this when the state of its link changes(e.g. link failure, new negotiation results, etc.) Note: it shouldnot determine “up” by reading the BMSR. If in doubt about the linkstate at interrupt time, then pass true ifpcs_get_state() returnsthe latched link-down state, otherwise pass false.
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()
Description
Start the phylink instance specified bypl, configuring the MAC for thedesired link mode(s) and negotiation style. This should be called from thenetwork device driver’sstructnet_device_opsndo_open() method.
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()
Description
Stop the phylink instance specified bypl. This should be called from thenetwork device driver’sstructnet_device_opsndo_stop() method. Thenetwork device’s carrier state should not be changed prior to calling thisfunction.
This will synchronously bring down the link if the link is not alreadydown (in other words, it will trigger amac_link_down() method call.)
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()
Description
Disable the PHY’s ability to stop the receive clock while the receive pathis in EEE LPI state, until the number of calls tophylink_rx_clk_stop_block()are balanced by calls tophylink_rx_clk_stop_unblock().
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()
Description
All calls tophylink_rx_clk_stop_block() must be balanced with acorresponding call tophylink_rx_clk_stop_unblock() to restore the PHYsability to stop the receive clock when the receive path is in EEE LPI mode.
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()boolmac_woltrue if the MAC needs to receive packets for Wake-on-Lan
Description
Handle a network device suspend event. There are several cases:
If Wake-on-Lan is not active, we can bring down the link betweenthe MAC and PHY by calling
phylink_stop().If Wake-on-Lan is active, and being handled only by the PHY, wecan also bring down the link between the MAC and PHY.
If Wake-on-Lan is active, but being handled by the MAC, the MACstill needs to receive packets, so we can not bring the link down.
Note
when phylink managed Wake-on-Lan is in use,mac_wol is ignored.(structphylink_mac_ops.mac_set_wol populated.)
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()
Description
Optional, but if called must be called prior tophylink_resume().
Prepare to resume a network device, preparing the PHY as necessary.
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()
Description
Undo the effects ofphylink_suspend(), returning the link to anoperational state.
- voidphylink_ethtool_get_wol(structphylink*pl,structethtool_wolinfo*wol)¶
get the wake on lan parameters for the PHY
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()structethtool_wolinfo*wola pointer to
structethtool_wolinfoto hold the read parameters
Description
Read the wake on lan parameters from the PHY attached to the phylinkinstance specified bypl. If no PHY is currently attached, report nosupport for wake on lan.
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()structethtool_wolinfo*wola pointer to
structethtool_wolinfofor the desired parameters
Description
Set the wake on lan parameters for the PHY attached to the phylinkinstance specified bypl. If no PHY is attached, returnsEOPNOTSUPPerror.
Returns zero on success or negative errno code.
- intphylink_ethtool_ksettings_get(structphylink*pl,structethtool_link_ksettings*kset)¶
get the current link settings
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()structethtool_link_ksettings*kseta pointer to a
structethtool_link_ksettingsto hold link settings
Description
Read the current link settings for the phylink instance specified bypl.This will be the link settings read from the MAC, PHY or fixed linksettings depending on the current negotiation mode.
- intphylink_ethtool_ksettings_set(structphylink*pl,conststructethtool_link_ksettings*kset)¶
set the link settings
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()conststructethtool_link_ksettings*kseta pointer to a
structethtool_link_ksettingsfor the desired modes
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()
Description
Restart negotiation for the phylink instance specified bypl. This willcause any attached phy to restart negotiation with the link partner, andif the MAC is in a BaseX mode, the MAC will also be requested to restartnegotiation.
Returns zero on success, or negative error code.
- voidphylink_ethtool_get_pauseparam(structphylink*pl,structethtool_pauseparam*pause)¶
get the current pause parameters
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()structethtool_pauseparam*pausea pointer to a
structethtool_pauseparam
- intphylink_ethtool_set_pauseparam(structphylink*pl,structethtool_pauseparam*pause)¶
set the current pause parameters
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()structethtool_pauseparam*pausea pointer to a
structethtool_pauseparam
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create().
Description
Read the Energy Efficient Ethernet error counter from the PHY associatedwith the phylink instance specified bypl.
Returns positive error counter value, or negative error code.
- intphylink_ethtool_get_eee(structphylink*pl,structethtool_keee*eee)¶
read the energy efficient ethernet parameters
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()structethtool_keee*eeea pointer to a
structethtool_keeefor the read parameters
- intphylink_ethtool_set_eee(structphylink*pl,structethtool_keee*eee)¶
set the energy efficient ethernet parameters
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()structethtool_keee*eeea pointer to a
structethtool_keeefor the desired parameters
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()structifreq*ifra pointer to a
structifreqfor socket ioctlsintcmdioctl cmd to execute
Description
Perform the specified MII ioctl on the PHY attached to the phylink instancespecified bypl. If no PHY is attached, emulate the presence of the PHY.
SIOCGMIIPHY:read register from the current PHY.
SIOCGMIIREG:read register from the specified PHY.
SIOCSMIIREG:set a register on the specified PHY.
Return
zero on success or negative error code.
- intphylink_speed_down(structphylink*pl,boolsync)¶
set the non-SFP PHY to lowest speed supported by both link partners
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()boolsyncperform action synchronously
Description
If we have a PHY that is not part of a SFP module, then set the speedas described in thephy_speed_down() function. Please see this functionfor a description of thesync parameter.
Returns zero if there is no PHY, otherwise as perphy_speed_down().
- intphylink_speed_up(structphylink*pl)¶
restore the advertised speeds prior to the call to
phylink_speed_down()
Parameters
structphylink*pla pointer to a
structphylinkreturned fromphylink_create()
Description
If we have a PHY that is not part of a SFP module, then restore thePHY speeds as perphy_speed_up().
Returns zero if there is no PHY, otherwise as perphy_speed_up().
- voidphylink_decode_usxgmii_word(structphylink_link_state*state,uint16_tlpa)¶
decode the USXGMII word from a MAC PCS
Parameters
structphylink_link_state*statea pointer to a
structphylink_link_state.uint16_tlpaa 16 bit value which stores the USXGMII auto-negotiation word
Description
Helper for MAC PCS supporting the USXGMII protocol and the auto-negotiationcode word. Decode the USXGMII code word and populate the corresponding fields(speed, duplex) into the phylink_link_state structure.
- voidphylink_decode_usgmii_word(structphylink_link_state*state,uint16_tlpa)¶
decode the USGMII word from a MAC PCS
Parameters
structphylink_link_state*statea pointer to a
structphylink_link_state.uint16_tlpaa 16 bit value which stores the USGMII auto-negotiation word
Description
Helper for MAC PCS supporting the USGMII protocol and the auto-negotiationcode word. Decode the USGMII code word and populate the corresponding fields(speed, duplex) into the phylink_link_state structure. The structure for thisword is the same as the USXGMII word, except it only supports speeds up to1Gbps.
- voidphylink_mii_c22_pcs_decode_state(structphylink_link_state*state,unsignedintneg_mode,u16bmsr,u16lpa)¶
Decode MAC PCS state from MII registers
Parameters
structphylink_link_state*statea pointer to a
structphylink_link_state.unsignedintneg_modelink negotiation mode (PHYLINK_PCS_NEG_xxx)
u16bmsrThe value of the
MII_BMSRregisteru16lpaThe value of the
MII_LPAregister
Description
Helper for MAC PCS supporting the 802.3 clause 22 register set forclause 37 negotiation and/or SGMII control.
Parse the Clause 37 or Cisco SGMII link partner negotiation word intothe phylinkstate structure. This is suitable to be used for implementingthepcs_get_state() member of thestructphylink_pcs_ops structure ifaccessingbmsr andlpa cannot be done with MDIO directly.
- voidphylink_mii_c22_pcs_get_state(structmdio_device*pcs,unsignedintneg_mode,structphylink_link_state*state)¶
read the MAC PCS state
Parameters
structmdio_device*pcsa pointer to a
structmdio_device.unsignedintneg_modelink negotiation mode (PHYLINK_PCS_NEG_xxx)
structphylink_link_state*statea pointer to a
structphylink_link_state.
Description
Helper for MAC PCS supporting the 802.3 clause 22 register set forclause 37 negotiation and/or SGMII control.
Read the MAC PCS state from the MII device configured inconfig andparse the Clause 37 or Cisco SGMII link partner negotiation word intothe phylinkstate structure. This is suitable to be directly pluggedinto thepcs_get_state() member of thestructphylink_pcs_opsstructure.
- intphylink_mii_c22_pcs_encode_advertisement(phy_interface_tinterface,constunsignedlong*advertising)¶
configure the clause 37 PCS advertisement
Parameters
phy_interface_tinterfacethe PHY interface mode being configured
constunsignedlong*advertisingthe ethtool advertisement mask
Description
Helper for MAC PCS supporting the 802.3 clause 22 register set forclause 37 negotiation and/or SGMII control.
Encode the clause 37 PCS advertisement as specified byinterface andadvertising.
Return
The new value foradv, or-EINVAL if it should not be changed.
- intphylink_mii_c22_pcs_config(structmdio_device*pcs,phy_interface_tinterface,constunsignedlong*advertising,unsignedintneg_mode)¶
configure clause 22 PCS
Parameters
structmdio_device*pcsa pointer to a
structmdio_device.phy_interface_tinterfacethe PHY interface mode being configured
constunsignedlong*advertisingthe ethtool advertisement mask
unsignedintneg_modePCS negotiation mode
Description
Configure a Clause 22 PCS PHY with the appropriate negotiationparameters for themode,interface andadvertising parameters.Returns negative error number on failure, zero if the advertisementhas not changed, or positive if there is a change.
- voidphylink_mii_c22_pcs_an_restart(structmdio_device*pcs)¶
restart 802.3z autonegotiation
Parameters
structmdio_device*pcsa pointer to a
structmdio_device.
Description
Helper for MAC PCS supporting the 802.3 clause 22 register set forclause 37 negotiation.
Restart the clause 37 negotiation with the link partner. This issuitable to be directly plugged into thepcs_get_state() memberof thestructphylink_pcs_ops structure.
SFP support¶
- structsfp_bus¶
internal representation of a sfp bus
Definition:
struct sfp_bus {};Members
- structsfp_eeprom_id¶
raw SFP module identification information
Definition:
struct sfp_eeprom_id { struct sfp_eeprom_base base; struct sfp_eeprom_ext ext;};Members
basebase SFP module identification structure
extextended SFP module identification structure
Description
See the SFF-8472 specification and related documents for the definitionof these structure members. This can be obtained fromhttps://www.snia.org/technology-communities/sff/specifications
- structsfp_module_caps¶
sfp module capabilities
Definition:
struct sfp_module_caps { unsigned long interfaces[BITS_TO_LONGS(PHY_INTERFACE_MODE_MAX)]; unsigned long link_modes[BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS)]; bool may_have_phy; u8 port;};Members
interfacesbitmap of interfaces that the module may support
link_modesbitmap of ethtool link modes that the module may support
may_have_phyindicate whether the module may have an ethernet PHYThere is no way to be sure that a module has a PHY as the EEPROMdoesn’t contain this information. When set, this does not mean thatthe module definitely has a PHY.
portone of ethtool
PORT_*definitions, parsed from the moduleEEPROM, orPORT_OTHERif the port type is not known.
- structsfp_upstream_ops¶
upstream operations structure
Definition:
struct sfp_upstream_ops { void (*attach)(void *priv, struct sfp_bus *bus); void (*detach)(void *priv, struct sfp_bus *bus); int (*module_insert)(void *priv, const struct sfp_eeprom_id *id); void (*module_remove)(void *priv); int (*module_start)(void *priv); void (*module_stop)(void *priv); void (*link_down)(void *priv); void (*link_up)(void *priv); int (*connect_phy)(void *priv, struct phy_device *); void (*disconnect_phy)(void *priv, struct phy_device *);};Members
attachcalled when the sfp socket driver is bound to the upstream(mandatory).
detachcalled when the sfp socket driver is unbound from the upstream(mandatory).
module_insertcalled after a module has been detected to determinewhether the module is supported for the upstream device.
module_removecalled after the module has been removed.
module_startcalled after the PHY probe step
module_stopcalled before the PHY is removed
link_downcalled when the link is non-operational for whateverreason.
link_upcalled when the link is operational.
connect_phycalled when an I2C accessible PHY has been detectedon the module.
disconnect_phycalled when a module with an I2C accessible PHY hasbeen removed.
- phy_interface_tsfp_select_interface(structsfp_bus*bus,constunsignedlong*link_modes)¶
Select appropriate phy_interface_t mode
Parameters
structsfp_bus*busa pointer to the
structsfp_busstructure for the sfp moduleconstunsignedlong*link_modesethtool link modes mask
Description
Derive the phy_interface_t mode for the SFP module from the linkmodes mask.
- voidsfp_bus_put(structsfp_bus*bus)¶
put a reference on the
structsfp_bus
Parameters
structsfp_bus*busthe
structsfp_busfound viasfp_bus_find_fwnode()
Description
Put a reference on thestructsfp_bus and free the underlying structureif this was the last reference.
- intsfp_get_module_info(structsfp_bus*bus,structethtool_modinfo*modinfo)¶
Get the ethtool_modinfo for a SFP module
Parameters
structsfp_bus*busa pointer to the
structsfp_busstructure for the sfp modulestructethtool_modinfo*modinfoa
structethtool_modinfo
Description
Fill in the type and eeprom_len parameters inmodinfo for a module onthe sfp bus specified bybus.
Returns 0 on success or a negative errno number.
- intsfp_get_module_eeprom(structsfp_bus*bus,structethtool_eeprom*ee,u8*data)¶
Read the SFP module EEPROM
Parameters
structsfp_bus*busa pointer to the
structsfp_busstructure for the sfp modulestructethtool_eeprom*eea
structethtool_eepromu8*databuffer to contain the EEPROM data (must be at leastee->len bytes)
Description
Read the EEPROM as specified by the suppliedee. See the documentationforstructethtool_eeprom for the region to be read.
Returns 0 on success or a negative errno number.
- intsfp_get_module_eeprom_by_page(structsfp_bus*bus,conststructethtool_module_eeprom*page,structnetlink_ext_ack*extack)¶
Read a page from the SFP module EEPROM
Parameters
structsfp_bus*busa pointer to the
structsfp_busstructure for the sfp moduleconststructethtool_module_eeprom*pagea
structethtool_module_eepromstructnetlink_ext_ack*extackextack for reporting problems
Description
Read an EEPROM page as specified by the suppliedpage. See thedocumentation forstructethtool_module_eeprom for the page to be read.
Returns 0 on success or a negative errno number. More errorinformation might be provided via extack
Parameters
structsfp_bus*busa pointer to the
structsfp_busstructure for the sfp module
Description
Inform the SFP socket that the network device is now up, so that themodule can be enabled by allowing TX_DISABLE to be deasserted. Thisshould be called from the network device driver’sstructnet_device_opsndo_open() method.
Parameters
structsfp_bus*busa pointer to the
structsfp_busstructure for the sfp module
Description
Inform the SFP socket that the network device is now up, so that themodule can be disabled by asserting TX_DISABLE, disabling the laserin optical modules. This should be called from the network devicedriver’sstructnet_device_opsndo_stop() method.
Parameters
structsfp_bus*busa pointer to the
structsfp_busstructure for the sfp moduleunsignedintrate_kbdsignalling rate in units of 1000 baud
Description
Configure the rate select settings on the SFP module for the signallingrate (not the same as the data rate).
- Locks that may be held:
Phylink’s state_mutexrtnl lockSFP’s sm_mutex
- structsfp_bus*sfp_bus_find_fwnode(conststructfwnode_handle*fwnode)¶
parse and locate the SFP bus from fwnode
Parameters
conststructfwnode_handle*fwnodefirmware node for the parent device (MAC or PHY)
Description
Parse the parent device’s firmware node for a SFP bus, and locatethe sfp_bus structure, incrementing its reference count. This mustbe put viasfp_bus_put() when done.
corresponding to the errors detailed for
fwnode_property_get_reference_args().
-ENOMEMif we failed to allocate the bus.an error from the upstream’s
connect_phy()method.
Return
on success, a pointer to the sfp_bus structure,
NULLif no SFP is specified,on failure, an error pointer value:
- intsfp_bus_add_upstream(structsfp_bus*bus,void*upstream,conststructsfp_upstream_ops*ops)¶
parse and register the neighbouring device
Parameters
structsfp_bus*busthe
structsfp_busfound viasfp_bus_find_fwnode()void*upstreamthe upstream private data
conststructsfp_upstream_ops*opsthe upstream’s
structsfp_upstream_ops
Description
Add upstream driver for the SFP bus, and if the bus is complete, registerthe SFP bus usingsfp_register_upstream(). This takes a reference on thebus, so it is safe to put the bus after this call.
corresponding to the errors detailed for
fwnode_property_get_reference_args().
-ENOMEMif we failed to allocate the bus.an error from the upstream’s
connect_phy()method.
Return
on success, a pointer to the sfp_bus structure,
NULLif no SFP is specified,on failure, an error pointer value:
Parameters
structsfp_bus*busa pointer to the
structsfp_busstructure for the sfp module
Description
Delete a previously registered upstream connection for the SFPmodule.bus should have been added bysfp_bus_add_upstream().
Parameters
structsfp_bus*busa pointer to the
structsfp_busstructure for the sfp module
Description
Gets the SFP device’s name, ifbus has a registered socket. Callers musthold RTNL, and the returned name is only valid until RTNL is released.
Return
The name of the SFP device registered with
sfp_register_socket()NULLif no device was registered onbus