0037001 /*
0037002 generic/ip_eth.c
0037003
0037004 Ethernet specific part of the IP implementation
0037005
0037006 Created: Apr 22, 1993 by Philip Homburg
0037007
0037008 Copyright 1995 Philip Homburg
0037009 */
ip_eth.c contains the code that handles the passing of packets and configuration data between the ip layer and the ethernet layer. The first two functions in ipeth_.c, ipeth_init() and ipeth_main(), are initialization functions that are called during the initialization of the network service. The remainder of the functions in ip_eth.c move packets between the ip layer and the ethernet layer.
0037010
0037011 #include "inet.h"
0037012 #include "type.h"
0037013 #include "arp.h"
0037014 #include "assert.h"
0037015 #include "buf.h"
0037016 #include "clock.h"
0037017 #include "eth.h"
0037018 #include "event.h"
0037019 #include "ip.h"
0037020 #include "ip_int.h"
0037021
0037022 THIS_FILE
0037023
0037024 typedef struct xmit_hdr
0037025 {
0037026 time_t xh_time;
0037027 ipaddr_t xh_ipaddr;
0037028 } xmit_hdr_t;
xmit_hdr_t
If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).
xmit_hdr is declared in generic/ip_eth.c:
typedef struct xmit_hdr
{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.
0037029
0037030 PRIVATE ether_addr_t broadcast_ethaddr= { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
0037031 PRIVATE ipaddr_t broadcast_ipaddr= 0xFFFFFFFFL;
0037032
0037033 FORWARD void do_eth_read ARGS(( ip_port_t *port ));
0037034 FORWARD acc_t *get_eth_data ARGS(( int fd, size_t offset,
0037035 size_t count, int for_ioctl ));
0037036 FORWARD int put_eth_data ARGS(( int fd, size_t offset,
0037037 acc_t *data, int for_ioctl ));
0037038 FORWARD void ipeth_main ARGS(( ip_port_t *port ));
0037039 FORWARD void ipeth_set_ipaddr ARGS(( ip_port_t *port ));
0037040 FORWARD void ipeth_restart_send ARGS(( ip_port_t *ip_port ));
0037041 FORWARD int ipeth_send ARGS(( struct ip_port *ip_port, ipaddr_t dest,
0037042 acc_t *pack, int broadcast ));
0037043 FORWARD void ipeth_arp_reply ARGS(( int ip_port_nr, ipaddr_t ipaddr,
0037044 ether_addr_t *dst_ether_ptr ));
0037045 FORWARD int ipeth_update_ttl ARGS(( time_t enq_time, time_t now,
0037046 acc_t *eth_pack ));
0037047 FORWARD void ip_eth_arrived ARGS(( int port, acc_t *pack,
0037048 size_t pack_size ));
0037049
0037050
0037051 PUBLIC int ipeth_init(ip_port)
ipeth_init()
If an ip port's underlying data-link layer is ethernet, ipeth_init() is called by ip_init() during the ip port's initialization. ipeth_init() calls eth_open() to acquire an ethernet file descriptor and then initializes several ethernet-dependent fields of the ip port (e.g., ip_dl.dl_eth.de_flags).
0037052 ip_port_t *ip_port;
0037053 {
0037054 assert(BUF_S >= sizeof(xmit_hdr_t));
0037055 assert(BUF_S >= sizeof(eth_hdr_t));
0037056
0037057 ip_port->ip_dl.dl_eth.de_fd= eth_open(ip_port->
0037058 ip_dl.dl_eth.de_port, ip_port->ip_port,
0037059 get_eth_data, put_eth_data, ip_eth_arrived);
eth_open()
eth_open(port, srfd, get_userdata, put_userdata, put_pkt) finds an ethernet file descriptor that is free and associates the file descriptor with an ethernet port whose index within eth_port_table[] is port, eth_open()'s first parameter.
eth_open() is called by the ip code, the arp code and is called if an ethernet device file (e.g., /dev/eth) is opened directly.
Here are the relationships between various file descriptors and ports:
0037060 if (ip_port->ip_dl.dl_eth.de_fd < 0)
0037061 {
0037062 DBLOCK(1, printf("ip.c: unable to open eth port\n"));
0037063 return -1;
0037064 }
To understand ipeth_init(), it is necessary to understand ip ports.
ip_port / ip_port_table[]
For every interface listed in inet.conf, there is a single ip port. For example, for the following inet.conf file:
eth0 DP8390 0 { default; };
psip1;
there will be an ip port associated with the ethernet interface and an ip port associated with the psip interface. Each of these ip ports is a struct ip_port (see below) and each ip_port struct is in ip_port_table[]. So, for the example inet.conf file above, ip_port_table[] will have 2 elements; ip_port_table[0] will be for the ethernet interface and ip_port_table[1] will be for the psip interface.
Each element in ip_port_table[] is associated with several ip file descriptors. For example, the udp code (during initialization) will open up an ip file descriptor and this ip file descriptor will be associated with one of the elements in ip_port_table[].
typedef struct ip_port
{
int ip_flags, ip_dl_type;
int ip_port;
union
{
struct
{
int de_state;
int de_flags;
int de_port;
int de_fd;
acc_t *de_frame;
acc_t *de_q_head;
acc_t *de_q_tail;
acc_t *de_arp_head;
acc_t *de_arp_tail;
} dl_eth;
struct
{
int ps_port;
acc_t *ps_send_head;
acc_t *ps_send_tail;
} dl_ps;
} ip_dl;
ipaddr_t ip_ipaddr;
ipaddr_t ip_netmask;
ipaddr_t ip_subnetmask;
u16_t ip_frame_id;
u16_t ip_mss;
ip_dev_t ip_dev_main;
ip_dev_t ip_dev_set_ipaddr;
ip_dev_send_t ip_dev_send;
acc_t *ip_loopb_head;
acc_t *ip_loopb_tail;
event_t ip_loopb_event;
struct ip_fd *ip_proto_any;
struct ip_fd *ip_proto[IP_PROTO_HASH_NR];
} ip_port_t;
int ip_flags:
The possible ip_flags are #define'd in ip_int.h:
#define IPF_EMPTY 0x0
#define IPF_CONFIGURED 0x1
#define IPF_IPADDRSET 0x2
#define IPF_NETMASKSET 0x4
After the initialization of the ip port, ip_flags is set to IPF_CONFIGURED. If the "ifconfig -h host-IP-address" command is issued, ip_ioctl() sets the IPF_IPADDRSET flag before setting the ip address and (optionally) the subnet mask.
int ip_dl_type:
"dl" stands for "data link" (layer). ip_dl_type is set to the corresponding data link layer type of the port. These types include NETTYPE_ETH (ethernet) and NETTYPE_PSIP (psip).
int ip_port:
The port number of the ip device. For example, for a system with the following /etc/inet.conf file:
eth0 DP8390 0 { default; };
psip1;
there will be 2 ports: port 0 for the ethernet device and port 1 for the psip device.
Note that this port will not necessarily be the same as dl_eth.de_port (see below).
struct dl_eth: The dl_eth struct is used (instead of dl_ps) if the underlying data link layer device of this port is an ethernet device.
int de_state:
The possible de_state values are #define'd in ip_int.h:
#define IES_EMPTY 0x0
#define IES_SETPROTO 0x1
#define IES_GETIPADDR 0x2
#define IES_MAIN 0x3
#define IES_ERROR 0x4
When the ip port is being initialized, de_state changes in quick succession from IES_EMPTY to IES_SETPROTO to IES_GETIPADDR before entering IES_MAIN, which is its normal operational state.
int de_flags:
de_flags is initialized to IEF_EMPTY. Note that "SP" stands for "SusPend".
#define IEF_EMPTY 0x1
#define IEF_SUSPEND 0x8
#define IEF_READ_IP 0x10
#define IEF_READ_SP 0x20
#define IEF_WRITE_SP 0x80
int de_port:
The ethernet port number. For example, if there were two ethernet devices, one ethernet device would have port 0 and the other would have port 1, regardless of how many psip devices were on the system.
This value is initialized in ip_init(). Also see the initial comments in ip_config.c for a description of ip_conf[].
Note that this port will not necessarily be the same as ip_port (see above).
int de_fd:
Initialized by calling eth_open(), de_fd is the ip port's associated ethernet file descriptor.
acc_t *de_frame:
acc_t *de_q_head:
acc_t *de_q_tail:
The queueing for ethernet packets being sent out by the ethernet task is somewhat convoluted. If no ethernet packets are waiting to be sent out by the ethernet task (driver), eth_write_port() stores an ethernet packet in an ethernet port's etp_wr_pack field until the packet is sent off by the ethernet task. After the ethernet task successfully sends the packet off, this field is set to NULL (either by eth_write_port() or write_int()). If the ethernet task cannot immediately send the ethernet packet off, the packet remains in etp_wr_pack. If another packet arrives for the ip port to send off to the ethernet port, the ip port encapsulates the ip packet with an ethernet header and the resulting ethernet packet is placed in the dl_eth.de_frame field of the ip port. If the ip port has additional packets that it wishes to send out, the packets are placed in the dl_eth.de_q_head/dl_eth.de_q_tail queue until the ethernet packets in etp_wr_pack and dl_eth.de_frame are sent out.
It's important to note that neither etp_wr_pack nor dl_eth.de_frame are linked lists (i.e., queues). They each hold only a single ethernet packet.
acc_t *de_arp_head:
acc_t *de_arp_tail:
If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).
xmit_hdr is declared in generic/ip_eth.c:
typedef struct xmit_hdr
{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.
struct dl_ps:
int ps_port:
acc_t *ps_send_head:
acc_t *ps_send_tail:
The dl_ps struct is used (instead of dl_eth) if the underlying data link layer device is a psip device. Coverage of psip is not included in this documentation.
ipaddr_t ip_ipaddr:
ipaddr_t ip_netmask:
ipaddr_t ip_subnetmask:
The ip address of a port can be set in two ways, either with RARP or through the "ifconfig -h host-IP-address" command. If set by the ifconfig command, a message requesting an NWIOSIPCONF (Set IP CONFiguration) is sent to the appropriate ip device (e.g., /dev/ip00, causing ip_ioctl() to be called. The user then passes in a nwio_ipconf struct which contains either the ip address or the subnet mask or both.
The netmask is simply a reflection of the class to which the ip address belongs. For example, if the ip address is 194.77.33.5, then it is a class C address and its netmask is therefore 255.255.255.0. See ip_nettype() for more information.
u16_t ip_frame_id:
ip_frame_id is initialized to the time at which the ip port was configured and incremented each time a packet is sent out. The ih_id field of each packet's ip header is set to ip_frame_id. If a packet is fragmented, the receiver can properly order the framents.
u16_t ip_mss:
If the ip_port_table[] element has an underlying ethernet layer, ip_mss is initialized to ETH_MAX_PACK_SIZE-ETH_HDR_SIZE (1514-14=1500), the size of the payload of an ethernet packet in bytes. If the size of the resulting ip packet is too large, the code fragments the packet.
ip_dev_t ip_dev_main:
ip_dev_main is initialized to ipeth_main() for ethernet devices. This function is called in ip_init().
ip_dev_t ip_dev_set_ipaddr:
ip_dev_set_ipaddr is initialized to ipeth_set_ipaddr() for ethernet devices.
ip_dev_send_t ip_dev_send:
ip_dev_send is initialized to ipeth_send() for ethernet devices or ipps_send() for psip devices.
acc_t *ip_loopb_head:
acc_t *ip_loopb_tail:
event_t ip_loopb_event:
Ip packets destined for the loopback address (127.0.0.1) or destined for the ip address of the ip port itself are placed in the ip_loopb_head/ip_loopb_tail before being delivered back to the ip port.
ip_loopb_event is an event that has been placed in the system-wide event queue.
struct ip_fd *ip_proto_any:
struct ip_fd *ip_proto[IP_PROTO_HASH_NR]:
For a description of ip_proto_any and ip_proto[], click here.
0037065 ip_port->ip_dl.dl_eth.de_state= IES_EMPTY;
0037066 ip_port->ip_dl.dl_eth.de_flags= IEF_EMPTY;
0037067 ip_port->ip_dl.dl_eth.de_q_head= NULL;
0037068 ip_port->ip_dl.dl_eth.de_q_tail= NULL;
0037069 ip_port->ip_dl.dl_eth.de_arp_head= NULL;
0037070 ip_port->ip_dl.dl_eth.de_arp_tail= NULL;
0037071 ip_port->ip_dev_main= ipeth_main;
0037072 ip_port->ip_dev_set_ipaddr= ipeth_set_ipaddr;
0037073 ip_port->ip_dev_send= ipeth_send;
0037074 ip_port->ip_mss= ETH_MAX_PACK_SIZE-ETH_HDR_SIZE;
0037075 return 0;
0037076 }
0037077
0037078 PRIVATE void ipeth_main(ip_port)
0037079 ip_port_t *ip_port;
ipeth_main()
ipeth_main() helps initialize an ip port whose underlying link layer device is an ethernet device. ipeth_main() calls eth_ioctl(), which configures the ethernet file descriptor that corresponds to the ip port and then calls arp_set_cb() to initialize the arp port that is associated with this ip port. After the initialization, ipeth_main() calls do_eth_read() to process any ethernet packets that have arrived at the ethernet file descriptor that was just opened.
0037080 {
0037081 int result, i;
0037082 ip_fd_t *ip_fd;
0037083
0037084 switch (ip_port->ip_dl.dl_eth.de_state)
0037085 {
0037086 case IES_EMPTY:
0037087 ip_port->ip_dl.dl_eth.de_state= IES_SETPROTO;
0037088
0037089 result= eth_ioctl(ip_port->ip_dl.dl_eth.de_fd, NWIOSETHOPT);
If the ethernet file descriptor was opened by the ip code, the following flags and type will be set:
nweo_flags= NWEO_COPY|NWEO_EN_BROAD|NWEO_EN_MULTI|NWEO_TYPESPEC;
nweo_type= HTONS(ETH_IP_PROTO);
If successful, eth_ioctl() changes de_flags to IES_SETPROTO.
eth_ioctl()
The actions of eth_ioctl(fd, req) depend on req, eth_ioctl()'s second parameter:
NWIOSETHOPT (NetWork IO Set ETHernet OPTions):
If req is NWIOSETHOPT, eth_ioctl() configures the ethernet file descriptor fd (eth_ioctl()'s first parameter), which can then be used by a higher layer (e.g., ip, arp).
NWIOGETHSTAT (NetWork IO Get ETHernet STATs):
Only the arp code calls eth_ioctl() with the second parameter set to NWIOGETHSTAT. In this case, the ap_ethaddr field of the arp port is set to the ethernet address of the ethernet file descriptor's underlying ethernet port.
0037090 if (result == NW_SUSPEND)
0037091 ip_port->ip_dl.dl_eth.de_flags |= IEF_SUSPEND;
0037092 if (result<0)
0037093 {
0037094 DBLOCK(1, printf("eth_ioctl(..,%lx)=%d\n",
0037095 NWIOSETHOPT, result));
0037096 return;
0037097 }
0037098 if (ip_port->ip_dl.dl_eth.de_state != IES_SETPROTO)
0037099 return;
0037100 /* drops through */
0037101 case IES_SETPROTO:
0037102 result= arp_set_cb(ip_port->ip_dl.dl_eth.de_port,
0037103 ip_port->ip_port,
0037104 ipeth_arp_reply);
arp_set_cb()
During the initialization of the network service (and, more specifically, the initialization of the ip layer), arp_set_cb(eth_port, ip_port, arp_func) is called to initialize the arp port associated with the ethernet port eth_port, arp_set_cb()'s first parameter. After initializing the arp port, arp_set_cb() calls arp_main().
It is unclear what the "cb" in the function name stands for.
0037105 if (result != NW_OK)
0037106 {
0037107 #if !CRAMPED
0037108 printf("ipeth_main: arp_set_cb failed: %d\n",
0037109 result);
0037110 #endif
0037111 return;
0037112 }
0037113
0037114 /* Wait until the interface is configured up. */
0037115 ip_port->ip_dl.dl_eth.de_state= IES_GETIPADDR;
0037116 if (!(ip_port->ip_flags & IPF_IPADDRSET))
The ip address for the port must have been previously configured. This can be done with the ifconfig utility, which (through ip_ioctl()) sets the IPF_IPADDRSET flag.
0037117 {
0037118 ip_port->ip_dl.dl_eth.de_flags |= IEF_SUSPEND;
0037119 return;
0037120 }
0037121
0037122 /* fall through */
0037123 case IES_GETIPADDR:
0037124 ip_port->ip_dl.dl_eth.de_state= IES_MAIN;
0037125 for (i=0, ip_fd= ip_fd_table; i<IP_FD_NR; i++, ip_fd++)
0037126 {
0037127 if (!(ip_fd->if_flags & IFF_INUSE))
0037128 {
0037129 continue;
0037130 }
0037131 if (ip_fd->if_port != ip_port)
0037132 {
0037133 continue;
0037134 }
0037135 if (ip_fd->if_flags & IFF_GIPCONF_IP)
0037136 {
0037137 ip_ioctl (i, NWIOGIPCONF);
0037138 }
If one of the upper layers (e.g., udp) was not able to finish its initialization because the underlying ip port did not have an ip address configured, try again. For example, the udp code calls ip_ioctl() for each udp port with an argument of NWIOGIPCONF in order to set the up_ipaddr field of the udp port. If the ip address of the underlying ip port wasn't configured, the call to ip_ioctl() needs to be made again.
0037139 }
0037140 do_eth_read(ip_port);
Process any ethernet packets that have arrived at the ethernet file descriptor that is associated with this ip port.
do_eth_read()
do_eth_read(ip_port) repeatedly calls eth_read() until all of the ethernet packets in the read queue of the ethernet file descriptor associated with the ip port ip_port, do_eth_read()'s only parameter, have been passed up to the ip layer.
0037141 return;
0037142 #if !CRAMPED
0037143 default:
0037144 ip_panic(( "unknown state: %d",
0037145 ip_port->ip_dl.dl_eth.de_state));
0037146 #endif
0037147 }
0037148 }
0037149
0037150 PRIVATE acc_t *get_eth_data (fd, offset, count, for_ioctl)
0037151 int fd;
0037152 size_t offset;
0037153 size_t count;
0037154 int for_ioctl;
get_eth_data()
get_eth_data(fd, offset, count, for_ioctl) is (indirectly) called by a number of functions within the ethernet code, including eth_write(). get_eth_data() performs one of several tasks, depending on the state of the ip port and the value of count, get_eth_data()'s third parameter.
If the state of the ip port is IES_MAIN (its state during normal operations) and count is nonzero, get_eth_data() returns the packet from the de_frame field of the ip port. In this way, eth_write() gets the packet from the ip code to send off to eth_send().
If count is zero, get_eth_data() does something different. After eth_write() calls eth_send() (and the ethernet frame is therefore delivered), eth_write() calls the ethernet's reply_thr_get() with count equal to zero. If the ethernet file descriptor was opened up by the ip code, reply_thr_get() is simply a wrapper for eth_get_data(). In this scenario, get_eth_data() sets the ip port's de_frame field to null (since eth_send() just passed this packet to the ethernet driver) and calls ipeth_restart_send() if there are any ethernet packets that the ip code is waiting to send.
If the ip port's state is IES_PROTO (its configuration state), get_user_data() handles an initialization-related task. If count, get_eth_data()'s third parameter, is not zero (0), get_eth_data() sets various fields of an nwio_ethopt struct appropriate for the ip protocol and then returns a pointer to the struct.
When ipeth_main() calls eth_ioctl() the first time, eth_ioctl() in turn (indirectly) calls eth_get_data() to get the nwio_ethopt struct constructed by eth_get_data().
If count is zero and the ip port's state is IES_PROTO, get_eth_data() calls ipeth_main() if additional initialization is necessary.
0037155 {
0037156 ip_port_t *ip_port;
0037157 acc_t *data;
0037158 int result;
0037159
0037160 ip_port= &ip_port_table[fd];
The parameter name "fd" is somewhat unfortunate since fd is not an ip file descriptor but an ip port. More generally, the ef_srfd field of an ethernet file descriptor corresponds to an upper-layer port, not an upper-layer file descriptor.
0037161
0037162 switch (ip_port->ip_dl.dl_eth.de_state)
0037163 {
0037164 case IES_SETPROTO:
0037165 if (!count)
If the ip port's state is IES_PROTO (its configuration state), get_user_data() handles an initialization-related task. If count, get_eth_data()'s third parameter, is not zero (0), get_eth_data() sets various fields of an nwio_ethopt struct appropriate for the ip protocol and then returns a pointer to the struct.
If count is zero, get_eth_data() calls ipeth_main() if additional initialization is necessary.
0037166 {
0037167 result= (int)offset;
0037168 if (result<0)
0037169 {
0037170 ip_port->ip_dl.dl_eth.de_state= IES_ERROR;
0037171 break;
0037172 }
0037173 if (ip_port->ip_dl.dl_eth.de_flags & IEF_SUSPEND)
0037174 ipeth_main(ip_port);
ipeth_main()
ipeth_main() helps initialize an ip port whose underlying link layer device is an ethernet device. ipeth_main() calls eth_ioctl(), which configures the ethernet file descriptor that corresponds to the ip port and then calls arp_set_cb() to initialize the arp port that is associated with this ip port. After the initialization, ipeth_main() calls do_eth_read() to process any ethernet packets that have arrived at the ethernet file descriptor that was just opened.
0037175 return NW_OK;
0037176 }
0037177 assert ((!offset) && (count == sizeof(struct nwio_ethopt)));
If the ip port's state is IES_PROTO (its configuration state), get_user_data() handles an initialization-related task. If count, get_eth_data()'s third parameter, is not zero (0), get_eth_data() sets various fields of an nwio_ethopt struct appropriate for the ip protocol and then returns a pointer to the struct.
When ipeth_main() calls eth_ioctl() the first time, eth_ioctl() in turn (indirectly) calls eth_get_data() to get the nwio_ethopt struct constructed below.
0037178 {
0037179 struct nwio_ethopt *ethopt;
nwio_ethopt_t
The nwio_ethopt struct is a field within the eth_fd_t struct (ethernet file descriptor) and is also used to configure ethernet file descriptors.
nwio_ethopt_t ef_ethopt:
typedef struct nwio_ethopt
{
u32_t nweo_flags;
ether_addr_t nweo_multi, nweo_rem;
ether_type_t nweo_type;
} nwio_ethopt_t;
nweo_flags:
If the ethernet file descriptor was opened by the ip code, the following flags and type will be set:
nweo_flags= NWEO_COPY|NWEO_EN_BROAD|NWEO_EN_MULTI|NWEO_TYPESPEC;
nweo_type= HTONS(ETH_IP_PROTO);
#define NWEO_NOFLAGS 0x0000L
#define NWEO_ACC_MASK 0x0003L
#define NWEO_EXCL 0x00000001L
#define NWEO_SHARED 0x00000002L
#define NWEO_COPY 0x00000003L
From ip(4):
"If NWEO_SHARED is selected, then multiple channels (which all must select NWEO_SHARED) can use the same Ethernet type and they can all send packets. However, incoming packets will be delivered to at most one of them."
Note that, for whatever reason, NWEO_EXCL behaves exactly as NWEO_COPY. Every ethernet file descriptor so configured receives a copy of an incoming packet.
The access flags are important when an ethernet packet is being read.
#define NWEO_LOC_MASK 0x0010L
#define NWEO_BROAD_MASK 0x0020L
#define NWEO_EN_BROAD 0x00000020L
#define NWEO_DI_BROAD 0x00200000L
NWEO_EN_BROAD enables the receipt of broadcast packets.
#define NWEO_MULTI_MASK 0x0040L
#define NWEO_EN_MULTI 0x00000040L
#define NWEO_DI_MULTI 0x00400000L
NWEO_EN_MULTI enables the receipt of multicast packets. The nweo_multi field does not appear to be used in any meaningful way.
#define NWEO_PROMISC_MASK 0x0080L
#define NWEO_EN_PROMISC 0x00000080L
#define NWEO_DI_PROMISC 0x00800000L
If an ethernet file descriptor is in promiscuous mode, the file descriptor not only accepts any packet regardless of destination ethernet address but can also send out packets with any source ethernet address (not just the ethernet card's address).
If this is not the case, use the ethernet port's ethernet address.
#define NWEO_REM_MASK 0x0100L
#define NWEO_REMSPEC 0x00000100L
#define NWEO_REMANY 0x01000000L
From ip(4):
"NWEO_REMSPEC restricts sending and receiving of packets to the single remote computer specified in the nweo_rem field."
If the NWEO_REMANY flag is set, an ethernet packet may have any destination.
#define NWEO_TYPE_MASK 0x0200L
#define NWEO_TYPESPEC 0x00000200L
#define NWEO_TYPEANY 0x02000000L
From ip(4):
"NWEO_TYPESPEC restricts sending and receiving of packets to the type specified in nweo_type."
If the NWEO_TYPESPEC flag is set, the nweo_type field (see below) may be one of the following:
#define ETH_RARP_PROTO 0x8035
#define ETH_ARP_PROTO 0x806
#define ETH_IP_PROTO 0x800
#define NWEO_RW_MASK 0x1000L
#define NWEO_RWDATONLY 0x00001000L
#define NWEO_RWDATALL 0x10000000L
From ip(4):
"If the Ethernet header is completely specified by the nweo_flags (i.e.,
all of NWEO_EN_LOC, NWEO_DI_BROAD, NWEO_DI_MULTI, NWEO_DI_PROMISC,
NWEO_REMSPEC and NWEO_TYPESPEC are specified), then NWEO_RWDATONLY can be
used to send and receive only the data part of an Ethernet packet."
The default for the ethernet file descriptors opened by the ip and arp layers is NWEO_RWDATALL.
ether_addr_t nweo_multi:
This field is not used in any meaningful way.
nweo_rem:
Used with the NWEO_REMSPEC flag (see above).
font color=green>ether_type_t nweo_type:
Used with the NWEO_TYPESPEC flag (see above). The nweo_type field may be one of the following:
#define ETH_RARP_PROTO 0x8035
#define ETH_ARP_PROTO 0x806
#define ETH_IP_PROTO 0x800
0037180 acc_t *acc;
0037181
0037182 acc= bf_memreq(sizeof(*ethopt));
bf_memreq()
After the buffers have been initialized, accessors[] looks like the following:
bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:
Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.
So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.
For a complete description of the network service's buffer management, click here.
0037183 ethopt= (struct nwio_ethopt *)ptr2acc_data(acc);
ptr2acc_data()
The macro ptr2acc_data is #define'd in inet/generic/buf.h as:
#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))
ptr2acc_data() simply returns a pointer to the actual data within an accessor.
ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.
0037184 ethopt->nweo_flags= NWEO_COPY|NWEO_EN_BROAD|
0037185 NWEO_EN_MULTI|NWEO_TYPESPEC;
0037186 ethopt->nweo_type= HTONS(ETH_IP_PROTO);
0037187 return acc;
0037188 }
0037189
0037190 case IES_MAIN:
0037191 if (!count)
After eth_write() calls eth_send() (and the ethernet frame is therefore delivered), eth_write() calls the ethernet's reply_thr_get() with count equal to zero. If the ethernet file descriptor was opened up by the ip code, reply_thr_get() is simply a wrapper for eth_get_data(). In this scenario, get_eth_data() sets the ip port's de_frame field to null (since eth_send() just passed this packet to the ethernet driver) and calls ipeth_restart_send() if there are any ethernet packets that the ip code is waiting to send.
0037192 {
0037193 result= (int)offset;
0037194 if (result<0)
0037195 ip_warning(( "error on write: %d\n", result ));
The ethernet packet in the ip port's de_frame field was either successfully sent to the ethernet task or there was a problem and it was not possible to send it to the task. In either case, the packet is no longer needed.
0037196 bf_afree (ip_port->ip_dl.dl_eth.de_frame);
The ethernet packet in this field has been delivered to the ethernet task by eth_write_port().
bf_afree()
After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:
Then the resulting chain will be:
bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.
bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).
0037197 ip_port->ip_dl.dl_eth.de_frame= 0;
0037198
0037199 if (ip_port->ip_dl.dl_eth.de_flags & IEF_WRITE_SP)
If a packet could not be delivered to the ethernet task (because a previous packet was already occupying the de_frame field) and was instead queued, the IEF_WRITE_SP flag will be set. Call ipeth_restart_send() to try to send these out.
0037200 {
0037201 ip_port->ip_dl.dl_eth.de_flags &=
0037202 ~IEF_WRITE_SP;
0037203 ipeth_restart_send(ip_port);
ipeth_restart_send()
ipeth_restart_send() attempts to send out the packets in an ip port's linked list of ethernet packets waiting to be sent out. If the ethernet packets are too large, ipeth_restart_send() calls ip_split_pack() to split the ethernet packet's encapsulated ip packet into two fragments.
ipeth_restart_send() is called in a number of places within ip_eth.c. For example, ipeth_restart_send() is called if an arp-reply is received for a previous arp-request sent out by the system. Since the destination ethernet address for an ethernet packet is now known, an attempt to send out the packet can be made.
0037204 }
0037205 return NW_OK;
0037206 }
If the state of the ip port is IES_MAIN (its state during normal operations) and count is nonzero, get_eth_data() returns the packet from the de_frame field of the ip port. In this way, eth_write() gets the packet from the ip code to hand off to eth_send(), which calls eth_write_port() (if the packet isn't destined for the loopback address), which hands the ethernet packet off to the ethernet driver.
0037207 data= bf_cut (ip_port->ip_dl.dl_eth.de_frame, offset, count);
bf_cut()
If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:
Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.
If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.
bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.
For a full description of the network service's buffer management, click here.
0037208 assert (data);
0037209 return data;
0037210 default:
0037211 #if !CRAMPED
0037212 printf(
0037213 "get_eth_data(%d, 0x%d, 0x%d) called but ip_state=0x%x\n",
0037214 fd, offset, count, ip_port->ip_dl.dl_eth.de_state);
0037215 #endif
0037216 break;
0037217 }
0037218 return 0;
0037219 }
0037220
0037221 PRIVATE int put_eth_data (port, offset, data, for_ioctl)
0037222 int port;
0037223 size_t offset;
0037224 acc_t *data;
0037225 int for_ioctl;
put_eth_data()
put_eth_data(port, offset, data, for_ioctl) is called only by reply_thr_put() with data, put_eth_data()'s third parameter, set to null. If there are no ethernet packets waiting to be delivered to the ip port, put_eth_data() simply clears the IEF_READ_IP flag. If data is null and there are ethernet packets waiting to be delivered to the ip port, put_eth_data() calls do_eth_read() to process the packets.
0037226 {
0037227 ip_port_t *ip_port;
0037228 acc_t *pack;
0037229 int result;
0037230
0037231 ip_port= &ip_port_table[port];
0037232
0037233 assert(0);
0037234
0037235 if (ip_port->ip_dl.dl_eth.de_flags & IEF_READ_IP)
0037236 {
0037237 if (!data)
data, put_eth_data()'s third parameter, will always be null since reply_thr_put() is the only function that calls put_eth_data().
0037238 {
0037239 result= (int)offset;
0037240 if (result<0)
0037241 {
0037242 DBLOCK(1, printf(
0037243 "ip.c: put_eth_data(..,%d,..)\n", result));
0037244 return NW_OK;
0037245 }
0037246 if (ip_port->ip_dl.dl_eth.de_flags & IEF_READ_SP)
The IEF_READ_SP flag is set if further ethernet packets are waiting to be handed off to the ip code.
0037247 {
0037248 ip_port->ip_dl.dl_eth.de_flags &=
0037249 ~(IEF_READ_IP|IEF_READ_SP);
0037250 do_eth_read(ip_port);
do_eth_read()
do_eth_read(ip_port) repeatedly calls eth_read() until all of the ethernet packets in the read queue of the ethernet file descriptor associated with the ip port ip_port, do_eth_read()'s only parameter, have been passed up to the ip layer.
0037251 }
0037252 else
0037253 ip_port->ip_dl.dl_eth.de_flags &= ~IEF_READ_IP;
No additional ethernet packets are waiting to be handed off to the ip layer so clear the IEF_READ_IP flag.
0037254 return NW_OK;
0037255 }
The code will never reach this point. reply_thr_put() is the only function that calls put_eth_data() and it calls put_eth_data() with its third argument (data) set to null.
0037256 assert (!offset);
0037257 /* Warning: the above assertion is illegal; puts and
0037258 gets of data can be brokenup in any piece the server
0037259 likes. However we assume that the server is eth.c
0037260 and it transfers only whole packets. */
0037261 ip_eth_arrived(port, data, bf_bufsize(data));
ip_eth_arrived()
ip_eth_arrived() is called by the ethernet code (e.g., packet2user()) to hand off a packet to the ip code. ip_eth_arrived() strips off the ethernet header before handing the packet off to ip_arrived() (if the packet is not an ethernet broadcast packet) or ip_arrived_broadcast() (if it is).
0037262 return NW_OK;
0037263 }
0037264 #if !CRAMPED
0037265 printf("ip_port->ip_dl.dl_eth.de_state= 0x%x",
0037266 ip_port->ip_dl.dl_eth.de_state);
0037267 ip_panic (( "strange status" ));
0037268 #endif
0037269 }
0037270
0037271 PRIVATE void ipeth_set_ipaddr(ip_port)
0037272 ip_port_t *ip_port;
ipeth_set_ipaddr()
ipeth_set_ipaddr() calls arp_set_ipaddr() to set the ap_ipaddr field of the ip port's associated arp port. If the ip port has not finished initializing, ipeth_main() is called to finish this initialization.
0037273 {
0037274 arp_set_ipaddr (ip_port->ip_dl.dl_eth.de_port, ip_port->ip_ipaddr);
arp_set_ipaddr()
arp_set_ipaddr(eth_port, ipaddr) is called only from ipeth_set_ipaddr(), which is (indirectly) called by ip_ioctl().
arp_set_ipaddr() simply sets the ap_ipaddr field of an arp port whose index within the arp_port_table[] is eth_port, arp_set_ipaddr()'s first parameter, to the ip address ipaddr, arp_set_ipaddr()'s second parameter.
0037275 if (ip_port->ip_dl.dl_eth.de_state == IES_GETIPADDR)
0037276 ipeth_main(ip_port);
ipeth_main()
ipeth_main() helps initialize an ip port whose underlying link layer device is an ethernet device. ipeth_main() calls eth_ioctl(), which configures the ethernet file descriptor that corresponds to the ip port and then calls arp_set_cb() to initialize the arp port that is associated with this ip port. After the initialization, ipeth_main() calls do_eth_read() to process any ethernet packets that have arrived at the ethernet file descriptor that was just opened.
0037277 }
0037278
0037279 PRIVATE int ipeth_send(ip_port, dest, pack, broadcast)
0037280 struct ip_port *ip_port;
0037281 ipaddr_t dest;
0037282 acc_t *pack;
0037283 int broadcast;
ipeth_send()
ipeth_send() is called (indirectly) by ip_send() to send out a packet to a destination address on the same subnet as the ip port from which it is sent or to send out a packet to a broadcast address. ipeth_send() first creates an ethernet header to prepend to the ip packet and then, if there are no packets waiting to be sent out, calls eth_send() in an attempt to send the packet to the ethernet task immediately. If eth_send() is not able to send the ethernet packet immediately, eth_write() is called to queue the packet. If there are already ethernet packets waiting to be sent out, eth_send() and eth_write() are not called and the packet is queued (i.e., placed in the de_q_head queue of the ip port).udp write path
For a write to a udp device (e.g., /dev/udp), the code takes the following path:
sr_rwio()
udp_write()
restart_write_fd()
ip_write()
ip_send()
if (packet is destined to a system on the local ethernet network) {
ipeth_send()
if (no previous packet being processed by ethernet task)
eth_send()
if (eth_send() can't immediately send packet)
eth_write()
}
else if (packet must be routed)
oroute_frag()
else if (packet ist destined for a local destination)
ev_enqueue()
0037284 {
0037285 int r;
0037286 acc_t *eth_pack, *tail;
0037287 size_t pack_size;
0037288 eth_hdr_t *eth_hdr;
0037289 xmit_hdr_t *xmit_hdr;
0037290 ipaddr_t hostpart;
0037291 time_t t;
0037292
0037293 /* Start optimistic: the arp will succeed without blocking and the
0037294 * ethernet packet can be sent without blocking also. Start with
0037295 * the allocation of the ethernet header.
0037296 */
Address Resolution Protocol (ARP)
ARP Protocol Overview
The Address Resolution Protocol (ARP), documented in RFC 826, translates a system's 32-bit IP address to its corresponding 48-bit Ethernet address.
ARP works by broadcasting a packet to all hosts attached to an Ethernet segment. The packet contains the ip address of the system with which the sender wishes to communicate. Only the host with this ip address answers the packet, sending its ethernet address in the response.
Hosts typically keep a cache of ARP responses (called the ARP table), based on the assumption that ip-to-hardware address mappings rarely change.
0037297 eth_pack= bf_memreq(sizeof(*eth_hdr));
The allocated accessor will hold the ethernet header.
bf_memreq()
After the buffers have been initialized, accessors[] looks like the following:
bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:
Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.
So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.
For a complete description of the network service's buffer management, click here.
0037298 assert(eth_pack->acc_next == NULL);
0037299 eth_pack->acc_next= pack;
The ethernet header is prepended to the data (which generally consists of an ip header, a udp or tcp header, and the payload).
0037300 pack_size= bf_bufsize(eth_pack);
0037301 if (pack_size<ETH_MIN_PACK_SIZE)
0037302 {
0037303 tail= bf_memreq(ETH_MIN_PACK_SIZE-pack_size);
bf_memreq()
After the buffers have been initialized, accessors[] looks like the following:
bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:
Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.
So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.
For a complete description of the network service's buffer management, click here.
0037304 eth_pack= bf_append(eth_pack, tail);
bf_append()
bf_append() appends one accessor linked list to another accessor linked list. For example, if the payload of an ethernet packet (1500 bytes) is appended to an ethernet header (14 bytes):
the resulting linked list is as follows:
0037305 }
0037306 eth_hdr= (eth_hdr_t *)ptr2acc_data(eth_pack);
ptr2acc_data()
The macro ptr2acc_data is #define'd in inet/generic/buf.h as:
#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))
ptr2acc_data() simply returns a pointer to the actual data within an accessor.
ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.eth_hdr_t
An ethernet header is fairly simple. The eth_hdr_t typedef is declared in server/ip/gen/eth_hdr.h:
typedef struct eth_hdr
{
ether_addr_t eh_dst;
ether_addr_t eh_src;
ether_type_t eh_proto;
} eth_hdr_t;
ether_addr_t eh_dst: The destination ethernet address.
ether_addr_t eh_src: The source ethernet address.
ether_type_t eh_proto: The protocol of the layer above. The three possibilities are:
#define ETH_RARP_PROTO 0x8035
#define ETH_ARP_PROTO 0x806
#define ETH_IP_PROTO 0x800
An ethernet frame also has a CRC (Cyclic Redundancy Check) at its end to enable the receiving system to determine if corruption occured during transit.
An ethernet MAC (physical) address is a 48 bit number. This number is broken down into two halves: 22 of the first 24-bits identify the vendor of the Ethernet board (called the "Organizationally Unique Identifier") and the second 24-bits form a serial number assigned by the vendor. This guarantees that no two Ethernet cards have the same MAC address. One of the remaining bits indicate if the packet is a multicast or broadcast packet and the other is used for vendor-specific applications (e.g., NetBEUI).
+--+--+--+--+--+--+
| destination MAC |
+--+--+--+--+--+--+
| source MAC |
+--+--+--+--+--+--+
|08 00|
+--+--+-----------+
| |
. IP .
. packet .
. .
| |
+--+--+--+--+-----+
| CRC |
+--+--+--+--+
0037307
0037308 /* Lookup the ethernet address */
0037309 if (broadcast)
0037310 eth_hdr->eh_dst= broadcast_ethaddr;
The variable broadcast_ethaddr is set to { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } on line 37030. This is the ethernet broadcast address (not to be confused with an ip broadcast address).
0037311 else
0037312 {
0037313 if ((dest & ip_port->ip_subnetmask) !=
0037314 (ip_port->ip_ipaddr & ip_port->ip_subnetmask))
Verify that the destination ip address is in the same subnet as the ip address of the ip port. For example, if 192.168.1.1/255.255.255.0 is the ip address of the ip port and 192.168.1.2/255.255.255.0 is the destination ip address:
(192.168.1.1 & 255.255.255.0) != 192.168.1.2 & 255.255.255.0
192.168.1.0 != 192.168.1.0
This is not a true statement. Therefore, ip_panic() would not be called.
0037315 {
0037316 #if !CRAMPED
0037317 ip_panic(( "invalid destination" ));
0037318 #endif
0037319 }
0037320
0037321 hostpart= (dest & ~ip_port->ip_subnetmask);
An ip address is made up of a network part and a host part. For example, for the ip address/subnet pair 192.168.1.60/255.255.255.0, the network part is 192.168.1.0 and the host part is 60.
192.168.1.60 & ~(255.255.255.0) = 192.168.1.60 & 0.0.0.255 = 60
0037322
0037323 assert(hostpart != 0);
0037324 assert(dest != ip_port->ip_ipaddr);
0037325
0037326 r= arp_ip_eth(ip_port->ip_dl.dl_eth.de_port,
0037327 dest, ð_hdr->eh_dst);
arp_ip_eth()
arp_ip_eth(eth_port, ipaddr, ethaddr) looks for an entry in the arp table that matches ipaddr, the second parameter, and if it finds it, returns the corresponding ethernet address in ethaddr, the third parameter. If arp_ip_eth() does not find a valid entry in the arp table for the ip address, it sends out an arp broadcast in an attempt to find the ethernet address for the ip address and returns NW_SUSPEND.
0037328 if (r == NW_SUSPEND)
xmit_hdr_t
If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).
xmit_hdr is declared in generic/ip_eth.c:
typedef struct xmit_hdr
{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.
0037329 {
0037330 /* Unfortunately, the arp takes some time, use
0037331 * the ethernet header to store the next hop
0037332 * ip address and the current time.
0037333 */
0037334 xmit_hdr= (xmit_hdr_t *)eth_hdr;
Since the ethernet header that we allocated on line 37297 will not immediately be used to hold an ethernet header, it can be used initially as a xmit_hdr.
xmit_hdr_t
If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).
xmit_hdr is declared in generic/ip_eth.c:
typedef struct xmit_hdr
{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.
0037335 xmit_hdr->xh_time= get_time();
get_time()
get_time() returns the number of clock ticks since reboot.
Several of the clients (eth, arp, ip, tcp, and udp) use get_time() to determine an appropriate timeout value for a given operation. For example, the arp code calls get_time() to determine an appropriate amount of time to wait for a response from an arp request before giving up.
0037336 xmit_hdr->xh_ipaddr= dest;
0037337 eth_pack->acc_ext_link= NULL;
Set acc_ext_link to NULL since the packet will be the last in the de_arp_head/de_arp_tail queue.
On lines 37338 - 37345, the packet is placed in the de_arp_head/de_arp_tail queue.
0037338 if (ip_port->ip_dl.dl_eth.de_arp_head == NULL)
0037339 ip_port->ip_dl.dl_eth.de_arp_head= eth_pack;
0037340 else
0037341 {
0037342 ip_port->ip_dl.dl_eth.de_arp_tail->
0037343 acc_ext_link= eth_pack;
0037344 }
0037345 ip_port->ip_dl.dl_eth.de_arp_tail= eth_pack;
0037346 return NW_OK;
0037347 }
0037348 if (r == EDSTNOTRCH)
EDSTNOTRCH is #define'd in /include/errno.h:
#define EDSTNOTRCH (_SIGN 56) /* destination not reachable */
0037349 {
0037350 bf_afree(eth_pack);
0037351 return EDSTNOTRCH;
0037352 }
0037353 assert(r == NW_OK);
0037354 }
Regardless whether the packet is a broadcast packet or it is not, the code continues from here. The ARP resolution succeeded and the ethernet header destination address is filled.
0037355
0037356 /* If we have no write in progress, we can try to send the ethernet
0037357 * packet using eth_send. If the IP packet is larger than mss,
0037358 * unqueue the packet and let ipeth_restart_send deal with it.
0037359 */
0037360 pack_size= bf_bufsize(eth_pack);
bf_bufsize()
bf_bufsize() returns the total buffer size of a linked list of accessors (i.e., the sum of acc_length for the accessors in a linked list).
For a detailed description of the network service's buffer management, click here.
0037361 if (ip_port->ip_dl.dl_eth.de_frame == NULL && pack_size <=
0037362 ip_port->ip_mss + sizeof(*eth_hdr))
If there are no ethernet packets in the de_frame queue waiting to be sent out (i.e., de_frame == NULL) and the size of the packet is smaller or equal to the maximum size, send the packet out.
If the size of the ethernet packet is too large, ipeth_restart_send() is called (see lines 37400-37401) to split up the ethernet packet into two smaller packets and then attempt to send out the smaller packets.
0037363 {
0037364 r= eth_send(ip_port->ip_dl.dl_eth.de_fd,
0037365 eth_pack, pack_size);
eth_send()
eth_send() does a couple of checks and sets some of the fields of the ethernet header before passing the packet off to ev_enqueue() (if the packet is destined for the local loopback) or eth_write_port() (if it is not).
0037366 if (r == NW_OK)
0037367 return NW_OK;
0037368
Blocking in eth_send() occurs if the write queue for the ethernet port is not empty.
The queueing of packets waiting to be sent out an ip port is complicated. Click here for a detailed explanation.
0037369 /* A non-blocking send is not possible, start a regular
0037370 * send.
0037371 */
0037372 assert(r == NW_WOULDBLOCK);
0037373 ip_port->ip_dl.dl_eth.de_frame= eth_pack;
0037374 r= eth_write(ip_port->ip_dl.dl_eth.de_fd, pack_size);
This call to eth_write() will return NW_SUSPEND since there is still a packet in the ethernet file descriptor's etp_wr_pack field. The ethernet file descriptor's EPF_MORE2WRITE flag will be set and etp_write_count will be set to pack_size.
0037375 if (r == NW_SUSPEND)
0037376 {
0037377 assert(!(ip_port->ip_dl.dl_eth.de_flags &
0037378 IEF_WRITE_SP));
0037379 ip_port->ip_dl.dl_eth.de_flags |= IEF_WRITE_SP;
0037380 }
0037381 assert(r == NW_OK || r == NW_SUSPEND);
0037382 return NW_OK;
0037383 }
0037384
0037385 /* Enqueue the packet, and store the current time, in the
0037386 * room for the ethernet source address.
0037387 */
0037388 t= get_time();
get_time()
get_time() returns the number of clock ticks since reboot.
Several of the clients (eth, arp, ip, tcp, and udp) use get_time() to determine an appropriate timeout value for a given operation. For example, the arp code calls get_time() to determine an appropriate amount of time to wait for a response from an arp request before giving up.
0037389 assert(sizeof(t) <= sizeof(eth_hdr->eh_src));
0037390 memcpy(ð_hdr->eh_src, &t, sizeof(t));
As the note above (37385-37386) says, the time computed by get_time() is copied to the ethernet header. This value is extracted and analyzed in ipeth_restart_send() on line 37455.
This value represents the time that the packet was enqueued (which happens in the next lines). The packet will be dropped if the ttl (e.g., for udp, the ttl is half a second) expires before the packet is sent off.
0037391
Lines 37392-37399 place the ethernet packet at the appropriate place in the linked list of ethernet packets waiting to be sent out the ip port.
The queueing of packets waiting to be sent out an ip port is complicated. Click here for a detailed explanation.
0037392 eth_pack->acc_ext_link= NULL;
0037393 if (ip_port->ip_dl.dl_eth.de_q_head == NULL)
0037394 ip_port->ip_dl.dl_eth.de_q_head= eth_pack;
0037395 else
0037396 {
0037397 ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link= eth_pack;
0037398 }
0037399 ip_port->ip_dl.dl_eth.de_q_tail= eth_pack;
0037400 if (ip_port->ip_dl.dl_eth.de_frame == NULL)
0037401 ipeth_restart_send(ip_port);
There will be no packet in the de_frame field if and only if the ethernet packet eth_pack was too large to be immediately sent out (see lines 37361-37362).
ipeth_restart_send()
ipeth_restart_send() attempts to send out the packets in an ip port's linked list of ethernet packets waiting to be sent out. If the ethernet packets are too large, ipeth_restart_send() calls ip_split_pack() to split the ethernet packet's encapsulated ip packet into two fragments.
ipeth_restart_send() is called in a number of places within ip_eth.c. For example, ipeth_restart_send() is called if an arp-reply is received for a previous arp-request sent out by the system. Since the destination ethernet address for an ethernet packet is now known, an attempt to send out the packet can be made.
0037402 return NW_OK;
0037403 }
0037404
0037405 PRIVATE void ipeth_restart_send(ip_port)
0037406 ip_port_t *ip_port;
ipeth_restart_send()
ipeth_restart_send() attempts to send out the packets in an ip port's linked list of ethernet packets waiting to be sent out. If the ethernet packets are too large, ipeth_restart_send() calls ip_split_pack() to split the ethernet packet's encapsulated ip packet into two fragments.
ipeth_restart_send() is called in a number of places within ip_eth.c. For example, ipeth_restart_send() is called if an arp-reply is received for a previous arp-request sent out by the system. Since the destination ethernet address for an ethernet packet is now known, an attempt to send out the packet can be made.
0037407 {
0037408 time_t now, enq_time;
0037409 int r;
0037410 acc_t *eth_pack, *ip_pack, *next_eth_pack, *next_part, *tail;
0037411 size_t pack_size;
0037412 eth_hdr_t *eth_hdr, *next_eth_hdr;
0037413
0037414 now= get_time();
get_time()
get_time() returns the number of clock ticks since reboot.
Several of the clients (eth, arp, ip, tcp, and udp) use get_time() to determine an appropriate timeout value for a given operation. For example, the arp code calls get_time() to determine an appropriate amount of time to wait for a response from an arp request before giving up.
0037415
0037416 while (ip_port->ip_dl.dl_eth.de_q_head != NULL)
Go through the ip port's linked list of ethernet packets that are waiting to be sent out.
The queueing of packets that are waiting to be sent out an ip port is complicated. Click here for a detailed explanation.
0037417 {
0037418 eth_pack= ip_port->ip_dl.dl_eth.de_q_head;
0037419 ip_port->ip_dl.dl_eth.de_q_head= eth_pack->acc_ext_link;
0037420
0037421 eth_hdr= (eth_hdr_t *)ptr2acc_data(eth_pack);
ptr2acc_data()
The macro ptr2acc_data is #define'd in inet/generic/buf.h as:
#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))
ptr2acc_data() simply returns a pointer to the actual data within an accessor.
ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.eth_hdr_t
An ethernet header is fairly simple. The eth_hdr_t typedef is declared in server/ip/gen/eth_hdr.h:
typedef struct eth_hdr
{
ether_addr_t eh_dst;
ether_addr_t eh_src;
ether_type_t eh_proto;
} eth_hdr_t;
ether_addr_t eh_dst: The destination ethernet address.
ether_addr_t eh_src: The source ethernet address.
ether_type_t eh_proto: The protocol of the layer above. The three possibilities are:
#define ETH_RARP_PROTO 0x8035
#define ETH_ARP_PROTO 0x806
#define ETH_IP_PROTO 0x800
An ethernet frame also has a CRC (Cyclic Redundancy Check) at its end to enable the receiving system to determine if corruption occured during transit.
An ethernet MAC (physical) address is a 48 bit number. This number is broken down into two halves: 22 of the first 24-bits identify the vendor of the Ethernet board (called the "Organizationally Unique Identifier") and the second 24-bits form a serial number assigned by the vendor. This guarantees that no two Ethernet cards have the same MAC address. One of the remaining bits indicate if the packet is a multicast or broadcast packet and the other is used for vendor-specific applications (e.g., NetBEUI).
+--+--+--+--+--+--+
| destination MAC |
+--+--+--+--+--+--+
| source MAC |
+--+--+--+--+--+--+
|08 00|
+--+--+-----------+
| |
. IP .
. packet .
. .
| |
+--+--+--+--+-----+
| CRC |
+--+--+--+--+
0037422
0037423 pack_size= bf_bufsize(eth_pack);
bf_bufsize()
bf_bufsize() returns the total buffer size of a linked list of accessors (i.e., the sum of acc_length for the accessors in a linked list).
For a detailed description of the network service's buffer management, click here.
0037424
0037425 if (pack_size > ip_port->ip_mss+sizeof(*eth_hdr))
If the ethernet packet is larger than the allowable size (i.e., larger than the mss field plus an ethernet header), break the ethernet packet into two packets. This is accomplished by splitting the encapsulated ip packet into two fragments, creating another ethernet header, and then encapsulating the second ip fragment with the newly created ethernet header.
0037426 {
0037427 /* Split the IP packet */
0037428 ip_pack= eth_pack->acc_next;
0037429 next_part= ip_pack;
0037430 ip_pack= ip_split_pack(ip_port, &next_part,
0037431 ip_port->ip_mss);
ip_split_pack()
ip_split_pack(ip_port, ref_last, first_size) is called by ipeth_restart_send() to split up an ip packet into fragments if the packet is too large (i.e., the packet is greater than the maximum ethernet packet size without the header (1514-14 bytes)). ip_split_pack() returns a reference to the first fragment and returns a reference to the second fragment in ref_last, ip_split_pack()'s second parameter. ref_last is also used to pass in the packet that is to be split.
0037432 if (ip_pack == NULL)
0037433 {
0037434 bf_afree(eth_pack);
bf_afree()
After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:
Then the resulting chain will be:
bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.
bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).
0037435 continue;
0037436 }
0037437
Lines 37438-37452 create a new ethernet header, encapsulate the second fragment of the newly split ip packet with the new ethernet header, and place the new ethernet packet in the linked list of the ip port's ethernet packets waiting to be sent out.
0037438 /* Allocate new ethernet header */
0037439 next_eth_pack= bf_memreq(sizeof(*next_eth_hdr));
bf_memreq()
After the buffers have been initialized, accessors[] looks like the following:
bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:
Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.
So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.
For a complete description of the network service's buffer management, click here.
0037440 next_eth_hdr= (eth_hdr_t *)ptr2acc_data(next_eth_pack);
ptr2acc_data()
The macro ptr2acc_data is #define'd in inet/generic/buf.h as:
#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))
ptr2acc_data() simply returns a pointer to the actual data within an accessor.
ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.
0037441 *next_eth_hdr= *eth_hdr;
0037442 next_eth_pack->acc_next= next_part;
The ethernet header for the second ip fragment will be the same as the ethernet header for the first ip fragment.
0037443
Place the newly created ethernet packet in the ip port's linked list of ethernet packets waiting to be sent out.
0037444 next_eth_pack->acc_ext_link= NULL;
0037445 if (ip_port->ip_dl.dl_eth.de_q_head == NULL)
0037446 ip_port->ip_dl.dl_eth.de_q_head= next_eth_pack;
0037447 else
0037448 ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link=
0037449 next_eth_pack;
0037450 ip_port->ip_dl.dl_eth.de_q_tail= next_eth_pack;
0037451
0037452 eth_pack->acc_next= ip_pack;
0037453 pack_size= bf_bufsize(eth_pack);
Calculate the new size of the first ethernet packet (not the ethernet packet just created). This value is used in eth_send() (see line 37477).
bf_bufsize()
bf_bufsize() returns the total buffer size of a linked list of accessors (i.e., the sum of acc_length for the accessors in a linked list).
For a detailed description of the network service's buffer management, click here.
0037454 }
0037455
enq_time is the time that the ethernet packet was queued and is placed in the source field of the ethernet header on line 37390. If more than a second has transpired since the ethernet packet was placed in the queue, update the ethernet packet's encapsulated ip packet's ttl and the checksum of its ip header . If the ttl expired, discard the packet.
HZ is #define'd in include/minix/const.h:
#define HZ 60 /* clock freq (software settable on IBM-PC) */
0037456 memcpy(&enq_time, ð_hdr->eh_src, sizeof(enq_time));
0037457 if (enq_time + HZ < now)
0037458 {
0037459 r= ipeth_update_ttl(enq_time, now, eth_pack);
ipeth_update_ttl()
ipeth_update_ttl(enq_time, now, eth_pack) adjusts the ttl of the encapsulated ip packet's ip header of ethernet packet eth_pack, the third parameter of ipeth_update_ttl(), and recalculates the checksum of the ip header to reflect this change.
If the ip packet's ttl has already expired, ipeth_update_ttl() returns ETIMEDOUT.
0037460 if (r == ETIMEDOUT)
Discard the packet and free up the packet's buffers if the ttl of the ethernet packet's encapculated ip packet has expired.
ETIMEDOUT is #define'd in /include/errno.h:
#define ETIMEDOUT (_SIGN 61) /* connection timed out */
0037461 {
0037462 ip_warning(( "should send ICMP ttl exceded" ));
0037463 bf_afree(eth_pack);
bf_afree()
After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:
Then the resulting chain will be:
bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.
bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).
0037464 continue;
0037465 }
0037466 assert(r == NW_OK);
0037467 }
0037468
An ethernet packet must be at least ETH_MIN_PACK_SIZE (#define'd as 60 in include/net/gen/ether.h) bytes. If not, add some zeroes onto the end of the packet.
0037469 if (pack_size<ETH_MIN_PACK_SIZE)
0037470 {
0037471 tail= bf_memreq(ETH_MIN_PACK_SIZE-pack_size);
bf_memreq()
After the buffers have been initialized, accessors[] looks like the following:
bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:
Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.
So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.
For a complete description of the network service's buffer management, click here.
0037472 eth_pack= bf_append(eth_pack, tail);
bf_append()
bf_append() appends one accessor linked list to another accessor linked list. For example, if the payload of an ethernet packet (1500 bytes) is appended to an ethernet header (14 bytes):
the resulting linked list is as follows:
0037473 }
0037474
0037475 assert(ip_port->ip_dl.dl_eth.de_frame == NULL);
0037476
0037477 r= eth_send(ip_port->ip_dl.dl_eth.de_fd, eth_pack, pack_size);
eth_send()
eth_send() does a couple of checks and sets some of the fields of the ethernet header before passing the packet off to ev_enqueue() (if the packet is destined for the local loopback) or eth_write_port() (if it is not).
0037478 if (r == NW_OK)
0037479 continue;
0037480
0037481 /* A non-blocking send is not possible, start a regular
0037482 * send.
0037483 */
The ethernet task is already trying to send out a packet. Place the packet in the de_frame field of the ip port.
The queueing of packets that are waiting to be sent out an ip port is complicated. Click here for a detailed explanation.
0037484 assert(r == NW_WOULDBLOCK);
0037485 ip_port->ip_dl.dl_eth.de_frame= eth_pack;
0037486 r= eth_write(ip_port->ip_dl.dl_eth.de_fd, pack_size);
eth_write()
If a few tests (e.g., a test to determine if the ethernet packet is either too large or too small) have positive results and the ethernet task is not attempting to send an ethernet packet (i.e., etp_wr_pack is null) and the packet is coming from the ip code, eth_write(fd, count) passes the ethernet packet stored in the dl_eth.de_frame field of the ip port associated with the ethernet file descriptor fd, eth_write()'s first parameter, to eth_send().
If the packet is coming from the arp code (i.e., an arp-request or an arp-reply is being sent out), eth_write() calls arp_getdata() to create the ethernet packet before passing the newly created packet off to eth_send().
If the ethernet task is attempting to send an ethernet packet, eth_write() sets the ethernet port's EPF_MORE2WRITE flag and returns NW_SUSPEND.
0037487 if (r == NW_SUSPEND)
As described above, eth_write() returns NW_SUSPEND if the ethernet task is still delivering a previous packet.
The IEF_WRITE_SP flag is eventually cleared by eth_get_data() on line 37200. ipeth_restart_send() is then called to attempt to send out the packets waiting to be sent out.
0037488 {
0037489 assert(!(ip_port->ip_dl.dl_eth.de_flags &
0037490 IEF_WRITE_SP));
0037491 ip_port->ip_dl.dl_eth.de_flags |= IEF_WRITE_SP;
0037492 return;
0037493 }
0037494 assert(r == NW_OK);
0037495 }
0037496 }
0037497
0037498
0037499 PRIVATE void ipeth_arp_reply(ip_port_nr, ipaddr, eth_addr)
0037500 int ip_port_nr;
0037501 ipaddr_t ipaddr;
0037502 ether_addr_t *eth_addr;
ipeth_arp_reply()
ipeth_arp_reply() is called (indirectly) by client_reply() under one of the following circumstances:
1) An arp-reply packet has been received in response to a previous arp-request packet that this system sent out.
2) An arp-request packet has timed out. In this case, eth_addr, ipeth_arp_reply()'s third parameter, will be NULL.
3) An arp-request packet has been received that contains the information requested by a previous arp-request packet that this system sent out.
ipeth_arp_reply() searches the queue of ethernet packets waiting for arp resolution for the ip address of the arp-request/arp-reply. If the arp resolution timed out, the packet is discarded. If the arp resolution was successful, the ethernet packet is moved to the queue of packets waiting to be sent out and ipeth_restart_send() is called to send out the packets.
0037503 {
0037504 acc_t *prev, *eth_pack;
0037505 int r;
0037506 xmit_hdr_t *xmit_hdr;
0037507 ip_port_t *ip_port;
0037508 time_t t;
0037509 eth_hdr_t *eth_hdr;
0037510 ether_addr_t tmp_eth_addr;
0037511
0037512 assert (ip_port_nr >= 0 && ip_port_nr < ip_conf_nr);
0037513 ip_port= &ip_port_table[ip_port_nr];
0037514
0037515 for (;;)
0037516 {
0037517 for (prev= 0, eth_pack= ip_port->ip_dl.dl_eth.de_arp_head;
0037518 eth_pack;
0037519 prev= eth_pack, eth_pack= eth_pack->acc_ext_link)
ipaddr, ipeth_arp_reply()'s second parameter, is the ip address whose corresponding ethernet address has been resolved or is the target ip address of an arp-request that has just timed out. Find all of the ethernet packets in the de_arp_head/de_arp_tail queue whose destination is this ip address and place the ethernet packet in the queue of outgoing ethernet packets (i.e., the de_q_head/de_q_tail queue) if the arp-request did not time out and discard the packet if the arp-request did time out.
xmit_hdr_t
If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).
xmit_hdr is declared in generic/ip_eth.c:
typedef struct xmit_hdr
{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.
0037520 {
0037521 xmit_hdr= (xmit_hdr_t *)ptr2acc_data(eth_pack);
ptr2acc_data()
The macro ptr2acc_data is #define'd in inet/generic/buf.h as:
#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))
ptr2acc_data() simply returns a pointer to the actual data within an accessor.
ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.
0037522 if (xmit_hdr->xh_ipaddr == ipaddr)
0037523 break;
0037524 }
0037525
0037526 if (eth_pack == NULL)
0037527 {
0037528 /* No packet found. */
0037529 break;
0037530 }
0037531
0037532 /* Delete packet from the queue. */
The arp-request has been either answered (perhaps through an arp-request received from another system) or has timed out. Remove the ethernet packet from the de_arp_head/de_arp_tail queue.
0037533 if (prev == NULL)
0037534 {
0037535 ip_port->ip_dl.dl_eth.de_arp_head=
0037536 eth_pack->acc_ext_link;
0037537 }
0037538 else
0037539 {
0037540 prev->acc_ext_link= eth_pack->acc_ext_link;
0037541 if (prev->acc_ext_link == NULL)
0037542 ip_port->ip_dl.dl_eth.de_arp_tail= prev;
0037543 }
0037544
0037545 if (eth_addr == NULL)
0037546 {
0037547 /* Destination is unreachable, delete packet. */
0037548 bf_afree(eth_pack);
bf_afree()
After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:
Then the resulting chain will be:
bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.
bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).
0037549 continue;
0037550 }
0037551
0037552 /* Fill in the ethernet address and put the packet on the
0037553 * transmit queue.
0037554 */
The ethernet packet was removed from the de_arp_head/de_arp_tail queue. Now that the destination ethernet address is known, place the ethernet packet on the queue of packets waiting to be sent out (i.e., the de_q_head/de_q_tail queue).
0037555 t= xmit_hdr->xh_time;
0037556 eth_hdr= (eth_hdr_t *)ptr2acc_data(eth_pack);
ptr2acc_data()
The macro ptr2acc_data is #define'd in inet/generic/buf.h as:
#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))
ptr2acc_data() simply returns a pointer to the actual data within an accessor.
ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.
0037557 eth_hdr->eh_dst= *eth_addr;
The destination ethernet address was acquired by either an arp-reply (the usual case) or an arp-request.
0037558 memcpy(ð_hdr->eh_src, &t, sizeof(t));
Before the packet is actually sent out (and the source ethernet address is placed in eh_src), the eh_src field is used to ensure that the packet did not time out.
0037559
0037560 eth_pack->acc_ext_link= NULL;
0037561 if (ip_port->ip_dl.dl_eth.de_q_head == NULL)
0037562 ip_port->ip_dl.dl_eth.de_q_head= eth_pack;
0037563 else
0037564 {
0037565 ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link=
0037566 eth_pack;
0037567 }
0037568 ip_port->ip_dl.dl_eth.de_q_tail= eth_pack;
0037569 }
0037570
0037571 /* Try to get some more ARPs in progress. */
As unlikely as it is, this while loop attempts to find the corresponding ethernet addresses for ip addresses by calling arp_ip_eth(). The loop is exited when arp_ip_eth() returns NW_SUSPEND, which will likely be the first time. The only scenario where arp_ip_eth() returns anything other than NW_SUSPEND is when a matching entry is found in the arp table.
0037572 while (ip_port->ip_dl.dl_eth.de_arp_head)
0037573 {
0037574 eth_pack= ip_port->ip_dl.dl_eth.de_arp_head;
0037575 xmit_hdr= (xmit_hdr_t *)ptr2acc_data(eth_pack);
ptr2acc_data()
The macro ptr2acc_data is #define'd in inet/generic/buf.h as:
#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))
ptr2acc_data() simply returns a pointer to the actual data within an accessor.
ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.xmit_hdr_t
If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).
xmit_hdr is declared in generic/ip_eth.c:
typedef struct xmit_hdr
{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.
0037576 r= arp_ip_eth(ip_port->ip_dl.dl_eth.de_port,
0037577 xmit_hdr->xh_ipaddr, &tmp_eth_addr);
arp_ip_eth()
arp_ip_eth(eth_port, ipaddr, ethaddr) looks for an entry in the arp table that matches ipaddr, the second parameter, and if it finds it, returns the corresponding ethernet address in ethaddr, the third parameter. If arp_ip_eth() does not find a valid entry in the arp table for the ip address, it sends out an arp broadcast in an attempt to find the ethernet address for the ip address and returns NW_SUSPEND.
0037578 if (r == NW_SUSPEND)
0037579 break; /* Normal case */
0037580
0037581 /* Dequeue the packet */
0037582 ip_port->ip_dl.dl_eth.de_arp_head= eth_pack->acc_ext_link;
If the code reaches this point, either the destination was declared unreachable or an ethernet address for the ip address has been found. Adjust the de_arp_head queue accordingly.
0037583
0037584 if (r == EDSTNOTRCH)
0037585 {
0037586 bf_afree(eth_pack);
bf_afree()
After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:
Then the resulting chain will be:
bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.
bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).
0037587 continue;
0037588 }
0037589 assert(r == NW_OK);
0037590
0037591 /* Fill in the ethernet address and put the packet on the
0037592 * transmit queue.
0037593 */
As was done above (lines 37555-38569), place the ethernet packet on the queue of packets waiting to be sent out (i.e., the de_q_head/de_q_tail queue).
0037594 t= xmit_hdr->xh_time;
xmit_hdr_t
If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).
xmit_hdr is declared in generic/ip_eth.c:
typedef struct xmit_hdr
{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.
0037595 eth_hdr= (eth_hdr_t *)ptr2acc_data(eth_pack);
ptr2acc_data()
The macro ptr2acc_data is #define'd in inet/generic/buf.h as:
#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))
ptr2acc_data() simply returns a pointer to the actual data within an accessor.
ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.
0037596 eth_hdr->eh_dst= tmp_eth_addr;
0037597 memcpy(ð_hdr->eh_src, &t, sizeof(t));
0037598
0037599 eth_pack->acc_ext_link= NULL;
0037600 if (ip_port->ip_dl.dl_eth.de_q_head == NULL)
0037601 ip_port->ip_dl.dl_eth.de_q_head= eth_pack;
0037602 else
0037603 {
0037604 ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link=
0037605 eth_pack;
0037606 }
0037607 ip_port->ip_dl.dl_eth.de_q_tail= eth_pack;
0037608 }
0037609
0037610 /* Restart sending ethernet packets. */
0037611 if (ip_port->ip_dl.dl_eth.de_frame == NULL)
0037612 ipeth_restart_send(ip_port);
ipeth_restart_send()
ipeth_restart_send() attempts to send out the packets in an ip port's linked list of ethernet packets waiting to be sent out. If the ethernet packets are too large, ipeth_restart_send() calls ip_split_pack() to split the ethernet packet's encapsulated ip packet into two fragments.
ipeth_restart_send() is called in a number of places within ip_eth.c. For example, ipeth_restart_send() is called if an arp-reply is received for a previous arp-request sent out by the system. Since the destination ethernet address for an ethernet packet is now known, an attempt to send out the packet can be made.
0037613 }
0037614
0037615 PRIVATE int ipeth_update_ttl(enq_time, now, eth_pack)
0037616 time_t enq_time;
0037617 time_t now;
0037618 acc_t *eth_pack;
ipeth_update_ttl()
ipeth_update_ttl(enq_time, now, eth_pack) adjusts the ttl of the encapsulated ip packet's ip header of ethernet packet eth_pack, the third parameter of ipeth_update_ttl(), and recalculates the checksum of the ip header to reflect this change.
If the ip packet's ttl has already expired, ipeth_update_ttl() returns ETIMEDOUT.
0037619 {
0037620 int ttl_diff;
0037621 ip_hdr_t *ip_hdr;
0037622 u32_t sum;
0037623 u16_t word;
0037624 acc_t *ip_pack;
0037625
0037626 ttl_diff= (now-enq_time)/HZ;
0037627 enq_time += ttl_diff*HZ;
0037628 assert(enq_time <= now && enq_time + HZ > now);
0037629
Lines 37630-37635 extract the encapsulated ip packet's ip header so that the header's fields can be analyzed and altered.
0037630 ip_pack= eth_pack->acc_next;
0037631 assert(ip_pack->acc_length >= sizeof(*ip_hdr));
0037632 assert(ip_pack->acc_linkC == 1 &&
0037633 ip_pack->acc_buffer->buf_linkC == 1);
0037634
0037635 ip_hdr= (ip_hdr_t *)ptr2acc_data(ip_pack);
ptr2acc_data()
The macro ptr2acc_data is #define'd in inet/generic/buf.h as:
#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))
ptr2acc_data() simply returns a pointer to the actual data within an accessor.
ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.
0037636 if (ip_hdr->ih_ttl <= ttl_diff)
0037637 return ETIMEDOUT;
Determine if the ip header's ttl has expired while in the queue.
The ttl is a somewhat tricky field. Let's say that a udp packet is being sent out of an ethernet interface and that this udp packet (which was, obviously, encapsulated by an ip header and an ethernet header) was enqueued for 5 seconds before it was ready to be sent out. The default ttl for udp packets is 30 (UDP_TTL); therefore, this packet will now have a ttl of 25. After the packet is sent out of the ethernet interface, each router that encounters the packet decrements the ttl field. If the packet has traversed 25 routers and still hasn't reached its destination, this 25th router will drop the packet.
As the example above shows, the ttl field can be both an upper-limit on time and an upper-limit on hop-count. The ttl field in this instance is an upper-limit on time.
Note that the meaning of the ttl field (according to various RFC's) has changed. Initially, the ttl field was an upper-limit on the time spent in transit. The meaning of the ttl field was later changed by the IETF developers to be the upper-limit on the hops needed to reach the destination. In the Minix network service code, the ttl can have both meanings, which adds to the confusion. For example, if the network service receives a packet on one interface and then has to route the packet out another interface, the ttl field is decremented (in other words, the ttl is regarded as a hop-count).
0037638 sum= (u16_t)~ip_hdr->ih_hdr_chk;
0037639 word= *(u16_t *)&ip_hdr->ih_ttl;
0037640 if (word > sum)
0037641 sum += 0xffff - word;
0037642 else
0037643 sum -= word;
0037644 ip_hdr->ih_ttl -= ttl_diff;
0037645 word= *(u16_t *)&ip_hdr->ih_ttl;
0037646 sum += word;
0037647 if (sum > 0xffff)
0037648 sum -= 0xffff;
0037649 assert(!(sum & 0xffff0000));
0037650 ip_hdr->ih_hdr_chk= ~sum;
0037651 assert(ip_hdr->ih_ttl > 0);
0037652 return NW_OK;
0037653 }
0037654
0037655 PRIVATE void do_eth_read(ip_port)
do_eth_read()
do_eth_read(ip_port) repeatedly calls eth_read() until all of the ethernet packets in the read queue of the ethernet file descriptor associated with the ip port ip_port, do_eth_read()'s only parameter, have been passed up to the ip layer.
0037656 ip_port_t *ip_port;
0037657 {
0037658 int result;
0037659
0037660 assert(!(ip_port->ip_dl.dl_eth.de_flags & IEF_READ_IP));
0037661
0037662 for (;;)
0037663 {
0037664 ip_port->ip_dl.dl_eth.de_flags |= IEF_READ_IP;
0037665
0037666 result= eth_read (ip_port->ip_dl.dl_eth.de_fd,
0037667 ETH_MAX_PACK_SIZE);
eth_read()
eth_read() attempts to deliver all of the ethernet packets in an ethernet file descriptor's read queue to its associated ip port or arp port or sr file descriptor and returns NW_SUSPEND when there are no more ethernet packets to deliver.
0037668 if (result == NW_SUSPEND)
eth_read() returns NW_SUSPEND if there isn't a packet waiting or the packet has expired. The IEF_READ_SP flag is eventually cleared by put_eth_data() (see lines 37248-37249).
0037669 {
0037670 assert(!(ip_port->ip_dl.dl_eth.de_flags &
0037671 IEF_READ_SP));
0037672 ip_port->ip_dl.dl_eth.de_flags |= IEF_READ_SP;
0037673 return;
0037674 }
If this point in the code is reached, eth_read() successfully processed the packet. Clear the IEF_READ_IP flag.
0037675 ip_port->ip_dl.dl_eth.de_flags &= ~IEF_READ_IP;
0037676 if (result<0)
eth_read() either returns NW_OK (if a packet was successfully processed or there was a problem with the packet) or NW_SUSPEND (if there was no packet waiting). Since NW_SUSPEND was handled above, at this point in the code, the variable result will never be negative.
0037677 {
0037678 return;
0037679 }
0037680 }
0037681 }
0037682
0037683 PRIVATE void ip_eth_arrived(port, pack, pack_size)
0037684 int port;
0037685 acc_t *pack;
0037686 size_t pack_size;
ip_eth_arrived()
ip_eth_arrived() is called by the ethernet code (e.g., packet2user()) to hand off a packet to the ip code. ip_eth_arrived() strips off the ethernet header before handing the packet off to ip_arrived() (if the packet is not an ethernet broadcast packet) or ip_arrived_broadcast() (if it is).udp read path
eth_arrive()
ip_eth_arrived()
if (unicast packet)
ip_arrived()
else if (ethernet broadcast packet)
ip_arrived_broadcast()
if (packet must be input routed)
hand off packet to destination ip port
else
ip_port_arrive() {
packet2user()
udp_ip_arrived()
}
0037687 {
0037688 int broadcast;
0037689 ip_port_t *ip_port;
0037690
0037691 ip_port= &ip_port_table[port];
0037692 broadcast= (*(u8_t *)ptr2acc_data(pack) & 1);
The ethernet broadcast address is ff:ff:ff:ff:ff:ff. The broadcast address is a special case of a multicast address, which has the low-order bit of the high-order byte set (i.e., 01:00:00:00:00:00).
ptr2acc_data()
The macro ptr2acc_data is #define'd in inet/generic/buf.h as:
#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))
ptr2acc_data() simply returns a pointer to the actual data within an accessor.
ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.
0037693
0037694 pack= bf_delhead(pack, ETH_HDR_SIZE);
Strip off the ethernet header before passing the packet off to ip_arrived() or ip_arrived_broadcast().
bf_delhead()
If only the beginning of a linked list can be freed, bf_delhead() is called. If acc_linkC and buf_linkC are one for all of the relevant accessors and their associated buffers in the linked list, the operation is straight-forward:
bf_delhead() is often called to remove the header (e.g., ip header) from a packet.
For a detailed description of the network service's buffer management, click here.
0037695
0037696 if (broadcast)
0037697 ip_arrived_broadcast(ip_port, pack);
ip_arrived_broadcast()
If a packet arrives on an ethernet interface, ip_arrived_broadcast() is called from ip_eth_arrived() (instead of ip_arrived()) if the arriving ethernet packet has the broadcast ethernet address (i.e., ff:ff:ff:ff:ff:ff). ip_arrived_broadcast() performs some checks that include verifying that the destination ip address (in addition to the destination ethernet address) is the broadcast address.
0037698 else
0037699 ip_arrived(ip_port, pack);
ip_arrived()
Depending on the destination ip address of its second parameter,
ip_arrived(ip_port, pack) does one of several things:
1) If the destination ip address is the ip address of the ip port associated with the ethernet port, ip_arrived() calls ip_port_arrive() for the packet.
2) If the destination ip address is the ip address of another ip port, ip_arrived() also calls ip_port_arrived(). This time, however, the first argument passed to ip_port_arrived() is the other port. Note that for this to take place, an input route to the other ip port must exist.
3) If the destination ip address is not the address of another ip port but it is in the same network as another ip port, ip_arrived() sends the packet out the other interface. Again, an input route to the other ip port for this destination must exist.
4) If the destination ip address is not the address of another ip port and it is not in the same network as another ip port, ip_arrived() sends the packet out to the gateway for this network. Again, an input route (including the gateway) to the other ip port for this destination must exist.
5) If the destination ip address is not the ip address of the ip port but an input route for the destination exists and is associated with the same ip port as the packet arrived, an icmp redirect message is sent to the source (provided the source is on the same network) and the packet is then sent. If the source of the ip packet is not on the same network as the ip port, the packet is dropped.
If an ip packet arrives on an ethernet interface, ip_eth_arrived() strips off a packet's ethernet header before handing the packet off to ip_arrived().
0037700 }
0037701
0037702 /*
0037703 * $PchId: ip_eth.c,v 1.9 1996/12/17 07:55:21 philip Exp $
0037704 */