Crossbow - transition to Mercurial
6752301 tx hang can occur under heavy load on nxge
6715651 nxge internal tx load balancing needs to go away

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 


  26 #include <sys/nxge/nxge_impl.h>
  27 #include <sys/nxge/nxge_hio.h>
  28 #include <npi_tx_wr64.h>
  29 
  30 /* Software LSO required header files */
  31 #include <netinet/tcp.h>
  32 #include <inet/ip_impl.h>
  33 #include <inet/tcp.h>
  34 
  35 static mblk_t *nxge_lso_eliminate(mblk_t *);
  36 static mblk_t *nxge_do_softlso(mblk_t *mp, uint32_t mss);
  37 static void nxge_lso_info_get(mblk_t *, uint32_t *, uint32_t *);
  38 static void nxge_hcksum_retrieve(mblk_t *,
  39     uint32_t *, uint32_t *, uint32_t *,
  40     uint32_t *, uint32_t *);
  41 static uint32_t nxge_csgen(uint16_t *, int);
  42 
  43 extern uint32_t         nxge_reclaim_pending;
  44 extern uint32_t         nxge_bcopy_thresh;
  45 extern uint32_t         nxge_dvma_thresh;
  46 extern uint32_t         nxge_dma_stream_thresh;
  47 extern uint32_t         nxge_tx_minfree;
  48 extern uint32_t         nxge_tx_intr_thres;
  49 extern uint32_t         nxge_tx_max_gathers;
  50 extern uint32_t         nxge_tx_tiny_pack;
  51 extern uint32_t         nxge_tx_use_bcopy;


  52 extern nxge_tx_mode_t   nxge_tx_scheme;
  53 uint32_t                nxge_lso_kick_cnt = 2;
  54 





  55 
  56 void
  57 nxge_tx_ring_task(void *arg)
  58 {
  59         p_tx_ring_t     ring = (p_tx_ring_t)arg;
  60 
  61         MUTEX_ENTER(&ring->lock);
  62         (void) nxge_txdma_reclaim(ring->nxgep, ring, 0);
  63         MUTEX_EXIT(&ring->lock);
  64 
  65         if (!isLDOMguest(ring->nxgep) && !ring->tx_ring_offline)
  66                 mac_tx_ring_update(ring->nxgep->mach, ring->tx_ring_handle);
  67 #if defined(sun4v)
  68         else {
  69                 nxge_hio_data_t *nhd =
  70                     (nxge_hio_data_t *)ring->nxgep->nxge_hw_p->hio;
  71                 nx_vio_fp_t *vio = &nhd->hio.vio;
  72 
  73                 /* Call back vnet. */
  74                 if (vio->cb.vio_net_tx_update) {
  75                         (*vio->cb.vio_net_tx_update)(ring->nxgep->hio_vr->vhp);
  76                 }
  77         }
  78 #endif
  79 }
  80 
  81 static void
  82 nxge_tx_ring_dispatch(p_tx_ring_t ring)
  83 {
  84         /*
  85          * Kick the ring task to reclaim some buffers.
  86          */
  87         (void) ddi_taskq_dispatch(ring->taskq,
  88             nxge_tx_ring_task, (void *)ring, DDI_SLEEP);
  89 }
  90 
  91 mblk_t *
  92 nxge_tx_ring_send(void *arg, mblk_t *mp)
  93 {
  94         p_nxge_ring_handle_t    nrhp = (p_nxge_ring_handle_t)arg;
  95         p_nxge_t                nxgep;
  96         p_tx_ring_t             tx_ring_p;
  97         int                     status, channel;
  98 
  99         ASSERT(nrhp != NULL);
 100         nxgep = nrhp->nxgep;
 101         channel = nxgep->pt_config.hw_config.tdc.start + nrhp->index;
 102         tx_ring_p = nxgep->tx_rings->rings[channel];
 103 
 104         ASSERT(nxgep == tx_ring_p->nxgep);
 105 
 106 #ifdef DEBUG
 107         if (isLDOMservice(nxgep)) {
 108                 ASSERT(!tx_ring_p->tx_ring_offline);
 109         }
 110 #endif
 111 
 112         status = nxge_start(nxgep, tx_ring_p, mp);
 113         if (status) {
 114                 nxge_tx_ring_dispatch(tx_ring_p);
 115                 return (mp);
 116         }
 117 
 118         return ((mblk_t *)NULL);
 119 }
 120 
 121 int
 122 nxge_start(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp)
 123 {
 124         int                     status = 0;
 125         p_tx_desc_t             tx_desc_ring_vp;
 126         npi_handle_t            npi_desc_handle;
 127         nxge_os_dma_handle_t    tx_desc_dma_handle;
 128         p_tx_desc_t             tx_desc_p;
 129         p_tx_msg_t              tx_msg_ring;
 130         p_tx_msg_t              tx_msg_p;
 131         tx_desc_t               tx_desc, *tmp_desc_p;
 132         tx_desc_t               sop_tx_desc, *sop_tx_desc_p;
 133         p_tx_pkt_header_t       hdrp;
 134         tx_pkt_header_t         tmp_hdrp;
 135         p_tx_pkt_hdr_all_t      pkthdrp;
 136         uint8_t                 npads = 0;
 137         uint64_t                dma_ioaddr;
 138         uint32_t                dma_flags;
 139         int                     last_bidx;
 140         uint8_t                 *b_rptr;
 141         caddr_t                 kaddr;
 142         uint32_t                nmblks;
 143         uint32_t                ngathers;
 144         uint32_t                clen;
 145         int                     len;
 146         uint32_t                pkt_len, pack_len, min_len;
 147         uint32_t                bcopy_thresh;
 148         int                     i, cur_index, sop_index;
 149         uint16_t                tail_index;
 150         boolean_t               tail_wrap = B_FALSE;
 151         nxge_dma_common_t       desc_area;
 152         nxge_os_dma_handle_t    dma_handle;
 153         ddi_dma_cookie_t        dma_cookie;
 154         npi_handle_t            npi_handle;
 155         p_mblk_t                nmp;
 156         p_mblk_t                t_mp;
 157         uint32_t                ncookies;
 158         boolean_t               good_packet;
 159         boolean_t               mark_mode = B_FALSE;
 160         p_nxge_stats_t          statsp;
 161         p_nxge_tx_ring_stats_t tdc_stats;
 162         t_uscalar_t             start_offset = 0;
 163         t_uscalar_t             stuff_offset = 0;
 164         t_uscalar_t             end_offset = 0;
 165         t_uscalar_t             value = 0;
 166         t_uscalar_t             cksum_flags = 0;
 167         boolean_t               cksum_on = B_FALSE;
 168         uint32_t                boff = 0;
 169         uint64_t                tot_xfer_len = 0;
 170         boolean_t               header_set = B_FALSE;
 171 #ifdef NXGE_DEBUG
 172         p_tx_desc_t             tx_desc_ring_pp;
 173         p_tx_desc_t             tx_desc_pp;
 174         tx_desc_t               *save_desc_p;
 175         int                     dump_len;
 176         int                     sad_len;
 177         uint64_t                sad;
 178         int                     xfer_len;
 179         uint32_t                msgsize;
 180 #endif
 181         p_mblk_t                mp_chain = NULL;
 182         boolean_t               is_lso = B_FALSE;
 183         boolean_t               lso_again;
 184         int                     cur_index_lso;
 185         p_mblk_t                nmp_lso_save;
 186         uint32_t                lso_ngathers;
 187         boolean_t               lso_tail_wrap = B_FALSE;
 188 
 189         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 190             "==> nxge_start: tx dma channel %d", tx_ring_p->tdc));
 191         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 192             "==> nxge_start: Starting tdc %d desc pending %d",
 193             tx_ring_p->tdc, tx_ring_p->descs_pending));
 194 
 195         statsp = nxgep->statsp;
 196 
 197         if (!isLDOMguest(nxgep)) {
 198                 switch (nxgep->mac.portmode) {
 199                 default:
 200                         if (nxgep->statsp->port_stats.lb_mode ==
 201                             nxge_lb_normal) {
 202                                 if (!statsp->mac_stats.link_up) {
 203                                         freemsg(mp);
 204                                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 205                                             "==> nxge_start: "
 206                                             "link not up"));
 207                                         goto nxge_start_fail1;
 208                                 }
 209                         }
 210                         break;
 211                 case PORT_10G_FIBER:
 212                         /*
 213                          * For the following modes, check the link status
 214                          * before sending the packet out:
 215                          * nxge_lb_normal, nxge_lb_ext10g, nxge_lb_phy10g
 216                          */
 217                         if (nxgep->statsp->port_stats.lb_mode <
 218                             nxge_lb_serdes10g) {
 219                                 if (!statsp->mac_stats.link_up) {
 220                                         freemsg(mp);
 221                                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 222                                             "==> nxge_start: "
 223                                             "link not up"));
 224                                         goto nxge_start_fail1;
 225                                 }
 226                         }
 227                         break;
 228                 }
 229         }
 230 
 231         if ((!(nxgep->drv_state & STATE_HW_INITIALIZED)) ||
 232             (nxgep->nxge_mac_state != NXGE_MAC_STARTED)) {
 233                 NXGE_DEBUG_MSG((nxgep, TX_CTL,
 234                     "==> nxge_start: hardware not initialized or stopped"));
 235                 freemsg(mp);
 236                 goto nxge_start_fail1;
 237         }
 238 
 239         if (nxgep->soft_lso_enable) {
 240                 mp_chain = nxge_lso_eliminate(mp);
 241                 NXGE_DEBUG_MSG((nxgep, TX_CTL,
 242                     "==> nxge_start(0): LSO mp $%p mp_chain $%p",
 243                     mp, mp_chain));
 244                 if (mp_chain == NULL) {
 245                         NXGE_ERROR_MSG((nxgep, TX_CTL,
 246                             "==> nxge_send(0): NULL mp_chain $%p != mp $%p",
 247                             mp_chain, mp));
 248                         goto nxge_start_fail1;
 249                 }
 250                 if (mp_chain != mp) {
 251                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 252                             "==> nxge_send(1): IS LSO mp_chain $%p != mp $%p",
 253                             mp_chain, mp));
 254                         is_lso = B_TRUE;
 255                         mp = mp_chain;
 256                         mp_chain = mp_chain->b_next;
 257                         mp->b_next = NULL;
 258                 }
 259         }
 260 
 261         hcksum_retrieve(mp, NULL, NULL, &start_offset,
 262             &stuff_offset, &end_offset, &value, &cksum_flags);
 263         if (!NXGE_IS_VLAN_PACKET(mp->b_rptr)) {
 264                 start_offset += sizeof (ether_header_t);
 265                 stuff_offset += sizeof (ether_header_t);
 266         } else {
 267                 start_offset += sizeof (struct ether_vlan_header);
 268                 stuff_offset += sizeof (struct ether_vlan_header);
 269         }
 270 
 271         if (cksum_flags & HCK_PARTIALCKSUM) {
 272                 NXGE_DEBUG_MSG((nxgep, TX_CTL,
 273                     "==> nxge_start: mp $%p len %d "
 274                     "cksum_flags 0x%x (partial checksum) ",
 275                     mp, MBLKL(mp), cksum_flags));
 276                 cksum_on = B_TRUE;
 277         }
 278 
 279         pkthdrp = (p_tx_pkt_hdr_all_t)&tmp_hdrp;
 280         pkthdrp->reserved = 0;
 281         tmp_hdrp.value = 0;
 282         nxge_fill_tx_hdr(mp, B_FALSE, cksum_on,
 283             0, 0, pkthdrp,
 284             start_offset, stuff_offset);
 285 
 286         lso_again = B_FALSE;
 287         lso_ngathers = 0;
 288 
 289         MUTEX_ENTER(&tx_ring_p->lock);
 290 
 291         if (isLDOMservice(nxgep)) {
 292                 tx_ring_p->tx_ring_busy = B_TRUE;
 293                 if (tx_ring_p->tx_ring_offline) {
 294                         freemsg(mp);
 295                         tx_ring_p->tx_ring_busy = B_FALSE;
 296                         (void) atomic_swap_32(&tx_ring_p->tx_ring_offline,
 297                             NXGE_TX_RING_OFFLINED);
 298                         MUTEX_EXIT(&tx_ring_p->lock);
 299                         return (status);
 300                 }
 301         }
 302 
 303         cur_index_lso = tx_ring_p->wr_index;
 304         lso_tail_wrap = tx_ring_p->wr_index_wrap;
 305 start_again:
 306         ngathers = 0;
 307         sop_index = tx_ring_p->wr_index;
 308 #ifdef  NXGE_DEBUG
 309         if (tx_ring_p->descs_pending) {
 310                 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: "
 311                     "desc pending %d ", tx_ring_p->descs_pending));
 312         }
 313 
 314         dump_len = (int)(MBLKL(mp));
 315         dump_len = (dump_len > 128) ? 128: dump_len;
 316 
 317         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 318             "==> nxge_start: tdc %d: dumping ...: b_rptr $%p "
 319             "(Before header reserve: ORIGINAL LEN %d)",
 320             tx_ring_p->tdc,
 321             mp->b_rptr,
 322             dump_len));
 323 
 324         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: dump packets "
 325             "(IP ORIGINAL b_rptr $%p): %s", mp->b_rptr,
 326             nxge_dump_packet((char *)mp->b_rptr, dump_len)));
 327 #endif
 328 
 329         tdc_stats = tx_ring_p->tdc_stats;
 330         mark_mode = (tx_ring_p->descs_pending &&
 331             ((tx_ring_p->tx_ring_size - tx_ring_p->descs_pending)
 332             < nxge_tx_minfree));
 333 
 334         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 335             "TX Descriptor ring is channel %d mark mode %d",
 336             tx_ring_p->tdc, mark_mode));
 337 
 338         if ((tx_ring_p->descs_pending + lso_ngathers) >= nxge_reclaim_pending) {
 339                 if (!nxge_txdma_reclaim(nxgep, tx_ring_p,
 340                     (nxge_tx_minfree + lso_ngathers))) {
 341                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 342                             "TX Descriptor ring is full: channel %d",
 343                             tx_ring_p->tdc));
 344                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 345                             "TX Descriptor ring is full: channel %d",
 346                             tx_ring_p->tdc));
 347                         if (is_lso) {
 348                                 /*
 349                                  * free the current mp and mp_chain if not FULL.
 350                                  */
 351                                 tdc_stats->tx_no_desc++;
 352                                 NXGE_DEBUG_MSG((nxgep, TX_CTL,
 353                                     "LSO packet: TX Descriptor ring is full: "
 354                                     "channel %d",
 355                                     tx_ring_p->tdc));
 356                                 goto nxge_start_fail_lso;
 357                         } else {


 358                                 cas32((uint32_t *)&tx_ring_p->queueing, 0, 1);
 359                                 tdc_stats->tx_no_desc++;
 360 
 361                                 if (isLDOMservice(nxgep)) {
 362                                         tx_ring_p->tx_ring_busy = B_FALSE;
 363                                         if (tx_ring_p->tx_ring_offline) {
 364                                                 (void) atomic_swap_32(
 365                                                     &tx_ring_p->tx_ring_offline,
 366                                                     NXGE_TX_RING_OFFLINED);

 367                                         }
 368                                 }
 369 
 370                                 MUTEX_EXIT(&tx_ring_p->lock);





 371                                 status = 1;
 372                                 goto nxge_start_fail1;
 373                         }
 374                 }
 375         }
 376 
 377         nmp = mp;
 378         i = sop_index = tx_ring_p->wr_index;
 379         nmblks = 0;
 380         ngathers = 0;
 381         pkt_len = 0;
 382         pack_len = 0;
 383         clen = 0;
 384         last_bidx = -1;
 385         good_packet = B_TRUE;
 386 
 387         desc_area = tx_ring_p->tdc_desc;
 388         npi_handle = desc_area.npi_handle;
 389         npi_desc_handle.regh = (nxge_os_acc_handle_t)
 390             DMA_COMMON_ACC_HANDLE(desc_area);
 391         tx_desc_ring_vp = (p_tx_desc_t)DMA_COMMON_VPTR(desc_area);
 392         tx_desc_dma_handle = (nxge_os_dma_handle_t)
 393             DMA_COMMON_HANDLE(desc_area);
 394         tx_msg_ring = tx_ring_p->tx_msg_ring;
 395 
 396         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: wr_index %d i %d",
 397             sop_index, i));
 398 
 399 #ifdef  NXGE_DEBUG
 400         msgsize = msgdsize(nmp);
 401         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 402             "==> nxge_start(1): wr_index %d i %d msgdsize %d",
 403             sop_index, i, msgsize));
 404 #endif
 405         /*
 406          * The first 16 bytes of the premapped buffer are reserved
 407          * for header. No padding will be used.
 408          */
 409         pkt_len = pack_len = boff = TX_PKT_HEADER_SIZE;
 410         if (nxge_tx_use_bcopy && (nxgep->niu_type != N2_NIU)) {
 411                 bcopy_thresh = (nxge_bcopy_thresh - TX_PKT_HEADER_SIZE);
 412         } else {
 413                 bcopy_thresh = (TX_BCOPY_SIZE - TX_PKT_HEADER_SIZE);
 414         }
 415         while (nmp) {
 416                 good_packet = B_TRUE;
 417                 b_rptr = nmp->b_rptr;
 418                 len = MBLKL(nmp);
 419                 if (len <= 0) {
 420                         nmp = nmp->b_cont;
 421                         continue;
 422                 }
 423                 nmblks++;
 424 
 425                 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(1): nmblks %d "
 426                     "len %d pkt_len %d pack_len %d",
 427                     nmblks, len, pkt_len, pack_len));
 428                 /*
 429                  * Hardware limits the transfer length to 4K for NIU and
 430                  * 4076 (TX_MAX_TRANSFER_LENGTH) for Neptune. But we just
 431                  * use TX_MAX_TRANSFER_LENGTH as the limit for both.
 432                  * If len is longer than the limit, then we break nmp into
 433                  * two chunks: Make the first chunk equal to the limit and
 434                  * the second chunk for the remaining data. If the second
 435                  * chunk is still larger than the limit, then it will be
 436                  * broken into two in the next pass.
 437                  */
 438                 if (len > TX_MAX_TRANSFER_LENGTH - TX_PKT_HEADER_SIZE) {
 439                         if ((t_mp = dupb(nmp)) != NULL) {
 440                                 nmp->b_wptr = nmp->b_rptr +
 441                                     (TX_MAX_TRANSFER_LENGTH
 442                                     - TX_PKT_HEADER_SIZE);
 443                                 t_mp->b_rptr = nmp->b_wptr;
 444                                 t_mp->b_cont = nmp->b_cont;
 445                                 nmp->b_cont = t_mp;
 446                                 len = MBLKL(nmp);
 447                         } else {
 448                                 if (is_lso) {
 449                                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 450                                             "LSO packet: dupb failed: "
 451                                             "channel %d",
 452                                             tx_ring_p->tdc));
 453                                         mp = nmp;
 454                                         goto nxge_start_fail_lso;
 455                                 } else {
 456                                         good_packet = B_FALSE;
 457                                         goto nxge_start_fail2;
 458                                 }
 459                         }
 460                 }
 461                 tx_desc.value = 0;
 462                 tx_desc_p = &tx_desc_ring_vp[i];
 463 #ifdef  NXGE_DEBUG
 464                 tx_desc_pp = &tx_desc_ring_pp[i];
 465 #endif
 466                 tx_msg_p = &tx_msg_ring[i];
 467 #if defined(__i386)
 468                 npi_desc_handle.regp = (uint32_t)tx_desc_p;
 469 #else
 470                 npi_desc_handle.regp = (uint64_t)tx_desc_p;
 471 #endif
 472                 if (!header_set &&
 473                     ((!nxge_tx_use_bcopy && (len > TX_BCOPY_SIZE)) ||
 474                     (len >= bcopy_thresh))) {
 475                         header_set = B_TRUE;
 476                         bcopy_thresh += TX_PKT_HEADER_SIZE;
 477                         boff = 0;
 478                         pack_len = 0;
 479                         kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma);
 480                         hdrp = (p_tx_pkt_header_t)kaddr;
 481                         clen = pkt_len;
 482                         dma_handle = tx_msg_p->buf_dma_handle;
 483                         dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma);
 484                         (void) ddi_dma_sync(dma_handle,
 485                             i * nxge_bcopy_thresh, nxge_bcopy_thresh,
 486                             DDI_DMA_SYNC_FORDEV);
 487 
 488                         tx_msg_p->flags.dma_type = USE_BCOPY;
 489                         goto nxge_start_control_header_only;
 490                 }
 491 
 492                 pkt_len += len;
 493                 pack_len += len;
 494 
 495                 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(3): "
 496                     "desc entry %d "
 497                     "DESC IOADDR $%p "
 498                     "desc_vp $%p tx_desc_p $%p "
 499                     "desc_pp $%p tx_desc_pp $%p "
 500                     "len %d pkt_len %d pack_len %d",
 501                     i,
 502                     DMA_COMMON_IOADDR(desc_area),
 503                     tx_desc_ring_vp, tx_desc_p,
 504                     tx_desc_ring_pp, tx_desc_pp,
 505                     len, pkt_len, pack_len));
 506 
 507                 if (len < bcopy_thresh) {
 508                         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(4): "
 509                             "USE BCOPY: "));
 510                         if (nxge_tx_tiny_pack) {
 511                                 uint32_t blst =
 512                                     TXDMA_DESC_NEXT_INDEX(i, -1,
 513                                     tx_ring_p->tx_wrap_mask);
 514                                 NXGE_DEBUG_MSG((nxgep, TX_CTL,
 515                                     "==> nxge_start(5): pack"));
 516                                 if ((pack_len <= bcopy_thresh) &&
 517                                     (last_bidx == blst)) {
 518                                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 519                                             "==> nxge_start: pack(6) "
 520                                             "(pkt_len %d pack_len %d)",
 521                                             pkt_len, pack_len));
 522                                         i = blst;
 523                                         tx_desc_p = &tx_desc_ring_vp[i];
 524 #ifdef  NXGE_DEBUG
 525                                         tx_desc_pp = &tx_desc_ring_pp[i];
 526 #endif
 527                                         tx_msg_p = &tx_msg_ring[i];
 528                                         boff = pack_len - len;
 529                                         ngathers--;
 530                                 } else if (pack_len > bcopy_thresh &&
 531                                     header_set) {
 532                                         pack_len = len;
 533                                         boff = 0;
 534                                         bcopy_thresh = nxge_bcopy_thresh;
 535                                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 536                                             "==> nxge_start(7): > max NEW "
 537                                             "bcopy thresh %d "
 538                                             "pkt_len %d pack_len %d(next)",
 539                                             bcopy_thresh,
 540                                             pkt_len, pack_len));
 541                                 }
 542                                 last_bidx = i;
 543                         }
 544                         kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma);
 545                         if ((boff == TX_PKT_HEADER_SIZE) && (nmblks == 1)) {
 546                                 hdrp = (p_tx_pkt_header_t)kaddr;
 547                                 header_set = B_TRUE;
 548                                 NXGE_DEBUG_MSG((nxgep, TX_CTL,
 549                                     "==> nxge_start(7_x2): "
 550                                     "pkt_len %d pack_len %d (new hdrp $%p)",
 551                                     pkt_len, pack_len, hdrp));
 552                         }
 553                         tx_msg_p->flags.dma_type = USE_BCOPY;
 554                         kaddr += boff;
 555                         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(8): "
 556                             "USE BCOPY: before bcopy "
 557                             "DESC IOADDR $%p entry %d "
 558                             "bcopy packets %d "
 559                             "bcopy kaddr $%p "
 560                             "bcopy ioaddr (SAD) $%p "
 561                             "bcopy clen %d "
 562                             "bcopy boff %d",
 563                             DMA_COMMON_IOADDR(desc_area), i,
 564                             tdc_stats->tx_hdr_pkts,
 565                             kaddr,
 566                             dma_ioaddr,
 567                             clen,
 568                             boff));
 569                         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: "
 570                             "1USE BCOPY: "));
 571                         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: "
 572                             "2USE BCOPY: "));
 573                         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: "
 574                             "last USE BCOPY: copy from b_rptr $%p "
 575                             "to KADDR $%p (len %d offset %d",
 576                             b_rptr, kaddr, len, boff));
 577 
 578                         bcopy(b_rptr, kaddr, len);
 579 
 580 #ifdef  NXGE_DEBUG
 581                         dump_len = (len > 128) ? 128: len;
 582                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 583                             "==> nxge_start: dump packets "
 584                             "(After BCOPY len %d)"
 585                             "(b_rptr $%p): %s", len, nmp->b_rptr,
 586                             nxge_dump_packet((char *)nmp->b_rptr,
 587                             dump_len)));
 588 #endif
 589 
 590                         dma_handle = tx_msg_p->buf_dma_handle;
 591                         dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma);
 592                         (void) ddi_dma_sync(dma_handle,
 593                             i * nxge_bcopy_thresh, nxge_bcopy_thresh,
 594                             DDI_DMA_SYNC_FORDEV);
 595                         clen = len + boff;
 596                         tdc_stats->tx_hdr_pkts++;
 597                         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(9): "
 598                             "USE BCOPY: "
 599                             "DESC IOADDR $%p entry %d "
 600                             "bcopy packets %d "
 601                             "bcopy kaddr $%p "
 602                             "bcopy ioaddr (SAD) $%p "
 603                             "bcopy clen %d "
 604                             "bcopy boff %d",
 605                             DMA_COMMON_IOADDR(desc_area),
 606                             i,
 607                             tdc_stats->tx_hdr_pkts,
 608                             kaddr,
 609                             dma_ioaddr,
 610                             clen,
 611                             boff));
 612                 } else {
 613                         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(12): "
 614                             "USE DVMA: len %d", len));
 615                         tx_msg_p->flags.dma_type = USE_DMA;
 616                         dma_flags = DDI_DMA_WRITE;
 617                         if (len < nxge_dma_stream_thresh) {
 618                                 dma_flags |= DDI_DMA_CONSISTENT;
 619                         } else {
 620                                 dma_flags |= DDI_DMA_STREAMING;
 621                         }
 622 
 623                         dma_handle = tx_msg_p->dma_handle;
 624                         status = ddi_dma_addr_bind_handle(dma_handle, NULL,
 625                             (caddr_t)b_rptr, len, dma_flags,
 626                             DDI_DMA_DONTWAIT, NULL,
 627                             &dma_cookie, &ncookies);
 628                         if (status == DDI_DMA_MAPPED) {
 629                                 dma_ioaddr = dma_cookie.dmac_laddress;
 630                                 len = (int)dma_cookie.dmac_size;
 631                                 clen = (uint32_t)dma_cookie.dmac_size;
 632                                 NXGE_DEBUG_MSG((nxgep, TX_CTL,
 633                                     "==> nxge_start(12_1): "
 634                                     "USE DVMA: len %d clen %d "
 635                                     "ngathers %d",
 636                                     len, clen,
 637                                     ngathers));
 638 #if defined(__i386)
 639                                 npi_desc_handle.regp = (uint32_t)tx_desc_p;
 640 #else
 641                                 npi_desc_handle.regp = (uint64_t)tx_desc_p;
 642 #endif
 643                                 while (ncookies > 1) {
 644                                         ngathers++;
 645                                         /*
 646                                          * this is the fix for multiple
 647                                          * cookies, which are basically
 648                                          * a descriptor entry, we don't set
 649                                          * SOP bit as well as related fields
 650                                          */
 651 
 652                                         (void) npi_txdma_desc_gather_set(
 653                                             npi_desc_handle,
 654                                             &tx_desc,
 655                                             (ngathers -1),
 656                                             mark_mode,
 657                                             ngathers,
 658                                             dma_ioaddr,
 659                                             clen);
 660 
 661                                         tx_msg_p->tx_msg_size = clen;
 662                                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 663                                             "==> nxge_start:  DMA "
 664                                             "ncookie %d "
 665                                             "ngathers %d "
 666                                             "dma_ioaddr $%p len %d"
 667                                             "desc $%p descp $%p (%d)",
 668                                             ncookies,
 669                                             ngathers,
 670                                             dma_ioaddr, clen,
 671                                             *tx_desc_p, tx_desc_p, i));
 672 
 673                                         ddi_dma_nextcookie(dma_handle,
 674                                             &dma_cookie);
 675                                         dma_ioaddr =
 676                                             dma_cookie.dmac_laddress;
 677 
 678                                         len = (int)dma_cookie.dmac_size;
 679                                         clen = (uint32_t)dma_cookie.dmac_size;
 680                                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 681                                             "==> nxge_start(12_2): "
 682                                             "USE DVMA: len %d clen %d ",
 683                                             len, clen));
 684 
 685                                         i = TXDMA_DESC_NEXT_INDEX(i, 1,
 686                                             tx_ring_p->tx_wrap_mask);
 687                                         tx_desc_p = &tx_desc_ring_vp[i];
 688 
 689 #if defined(__i386)
 690                                         npi_desc_handle.regp =
 691                                             (uint32_t)tx_desc_p;
 692 #else
 693                                         npi_desc_handle.regp =
 694                                             (uint64_t)tx_desc_p;
 695 #endif
 696                                         tx_msg_p = &tx_msg_ring[i];
 697                                         tx_msg_p->flags.dma_type = USE_NONE;
 698                                         tx_desc.value = 0;
 699 
 700                                         ncookies--;
 701                                 }
 702                                 tdc_stats->tx_ddi_pkts++;
 703                                 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start:"
 704                                     "DMA: ddi packets %d",
 705                                     tdc_stats->tx_ddi_pkts));
 706                         } else {
 707                                 NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL,
 708                                     "dma mapping failed for %d "
 709                                     "bytes addr $%p flags %x (%d)",
 710                                     len, b_rptr, status, status));
 711                                 good_packet = B_FALSE;
 712                                 tdc_stats->tx_dma_bind_fail++;
 713                                 tx_msg_p->flags.dma_type = USE_NONE;
 714                                 if (is_lso) {
 715                                         mp = nmp;
 716                                         goto nxge_start_fail_lso;
 717                                 } else {
 718                                         goto nxge_start_fail2;
 719                                 }
 720                         }
 721                 } /* ddi dvma */
 722 
 723                 if (is_lso) {
 724                         nmp_lso_save = nmp;
 725                 }
 726                 nmp = nmp->b_cont;
 727 nxge_start_control_header_only:
 728 #if defined(__i386)
 729                 npi_desc_handle.regp = (uint32_t)tx_desc_p;
 730 #else
 731                 npi_desc_handle.regp = (uint64_t)tx_desc_p;
 732 #endif
 733                 ngathers++;
 734 
 735                 if (ngathers == 1) {
 736 #ifdef  NXGE_DEBUG
 737                         save_desc_p = &sop_tx_desc;
 738 #endif
 739                         sop_tx_desc_p = &sop_tx_desc;
 740                         sop_tx_desc_p->value = 0;
 741                         sop_tx_desc_p->bits.hdw.tr_len = clen;
 742                         sop_tx_desc_p->bits.hdw.sad = dma_ioaddr >> 32;
 743                         sop_tx_desc_p->bits.ldw.sad = dma_ioaddr & 0xffffffff;
 744                 } else {
 745 #ifdef  NXGE_DEBUG
 746                         save_desc_p = &tx_desc;
 747 #endif
 748                         tmp_desc_p = &tx_desc;
 749                         tmp_desc_p->value = 0;
 750                         tmp_desc_p->bits.hdw.tr_len = clen;
 751                         tmp_desc_p->bits.hdw.sad = dma_ioaddr >> 32;
 752                         tmp_desc_p->bits.ldw.sad = dma_ioaddr & 0xffffffff;
 753 
 754                         tx_desc_p->value = tmp_desc_p->value;
 755                 }
 756 
 757                 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(13): "
 758                     "Desc_entry %d ngathers %d "
 759                     "desc_vp $%p tx_desc_p $%p "
 760                     "len %d clen %d pkt_len %d pack_len %d nmblks %d "
 761                     "dma_ioaddr (SAD) $%p mark %d",
 762                     i, ngathers,
 763                     tx_desc_ring_vp, tx_desc_p,
 764                     len, clen, pkt_len, pack_len, nmblks,
 765                     dma_ioaddr, mark_mode));
 766 
 767 #ifdef NXGE_DEBUG
 768                 npi_desc_handle.nxgep = nxgep;
 769                 npi_desc_handle.function.function = nxgep->function_num;
 770                 npi_desc_handle.function.instance = nxgep->instance;
 771                 sad = (save_desc_p->value & TX_PKT_DESC_SAD_MASK);
 772                 xfer_len = ((save_desc_p->value & TX_PKT_DESC_TR_LEN_MASK) >>
 773                     TX_PKT_DESC_TR_LEN_SHIFT);
 774 
 775 
 776                 NXGE_DEBUG_MSG((nxgep, TX_CTL, "\n\t: value 0x%llx\n"
 777                     "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\t"
 778                     "mark %d sop %d\n",
 779                     save_desc_p->value,
 780                     sad,
 781                     save_desc_p->bits.hdw.tr_len,
 782                     xfer_len,
 783                     save_desc_p->bits.hdw.num_ptr,
 784                     save_desc_p->bits.hdw.mark,
 785                     save_desc_p->bits.hdw.sop));
 786 
 787                 npi_txdma_dump_desc_one(npi_desc_handle, NULL, i);
 788 #endif
 789 
 790                 tx_msg_p->tx_msg_size = clen;
 791                 i = TXDMA_DESC_NEXT_INDEX(i, 1, tx_ring_p->tx_wrap_mask);
 792                 if (ngathers > nxge_tx_max_gathers) {
 793                         good_packet = B_FALSE;
 794                         hcksum_retrieve(mp, NULL, NULL, &start_offset,
 795                             &stuff_offset, &end_offset, &value,
 796                             &cksum_flags);
 797 
 798                         NXGE_DEBUG_MSG((NULL, TX_CTL,
 799                             "==> nxge_start(14): pull msg - "
 800                             "len %d pkt_len %d ngathers %d",
 801                             len, pkt_len, ngathers));
 802                         /* Pull all message blocks from b_cont */
 803                         if (is_lso) {
 804                                 mp = nmp_lso_save;
 805                                 goto nxge_start_fail_lso;
 806                         }
 807                         if ((msgpullup(mp, -1)) == NULL) {
 808                                 goto nxge_start_fail2;
 809                         }
 810                         goto nxge_start_fail2;
 811                 }
 812         } /* while (nmp) */
 813 
 814         tx_msg_p->tx_message = mp;
 815         tx_desc_p = &tx_desc_ring_vp[sop_index];
 816 #if defined(__i386)
 817         npi_desc_handle.regp = (uint32_t)tx_desc_p;
 818 #else
 819         npi_desc_handle.regp = (uint64_t)tx_desc_p;
 820 #endif
 821 
 822         pkthdrp = (p_tx_pkt_hdr_all_t)hdrp;
 823         pkthdrp->reserved = 0;
 824         hdrp->value = 0;
 825         bcopy(&tmp_hdrp, hdrp, sizeof (tx_pkt_header_t));
 826 
 827         if (pkt_len > NXGE_MTU_DEFAULT_MAX) {
 828                 tdc_stats->tx_jumbo_pkts++;
 829         }
 830 
 831         min_len = (ETHERMIN + TX_PKT_HEADER_SIZE + (npads * 2));
 832         if (pkt_len < min_len) {
 833                 /* Assume we use bcopy to premapped buffers */
 834                 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma);
 835                 NXGE_DEBUG_MSG((NULL, TX_CTL,
 836                     "==> nxge_start(14-1): < (msg_min + 16)"
 837                     "len %d pkt_len %d min_len %d bzero %d ngathers %d",
 838                     len, pkt_len, min_len, (min_len - pkt_len), ngathers));
 839                 bzero((kaddr + pkt_len), (min_len - pkt_len));
 840                 pkt_len = tx_msg_p->tx_msg_size = min_len;
 841 
 842                 sop_tx_desc_p->bits.hdw.tr_len = min_len;
 843 
 844                 NXGE_MEM_PIO_WRITE64(npi_desc_handle, sop_tx_desc_p->value);
 845                 tx_desc_p->value = sop_tx_desc_p->value;
 846 
 847                 NXGE_DEBUG_MSG((NULL, TX_CTL,
 848                     "==> nxge_start(14-2): < msg_min - "
 849                     "len %d pkt_len %d min_len %d ngathers %d",
 850                     len, pkt_len, min_len, ngathers));
 851         }
 852 
 853         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: cksum_flags 0x%x ",
 854             cksum_flags));
 855         {
 856                 uint64_t        tmp_len;
 857 
 858                 /* pkt_len already includes 16 + paddings!! */
 859                 /* Update the control header length */
 860                 tot_xfer_len = (pkt_len - TX_PKT_HEADER_SIZE);
 861                 tmp_len = hdrp->value |
 862                     (tot_xfer_len << TX_PKT_HEADER_TOT_XFER_LEN_SHIFT);
 863 
 864                 NXGE_DEBUG_MSG((nxgep, TX_CTL,
 865                     "==> nxge_start(15_x1): setting SOP "
 866                     "tot_xfer_len 0x%llx (%d) pkt_len %d tmp_len "
 867                     "0x%llx hdrp->value 0x%llx",
 868                     tot_xfer_len, tot_xfer_len, pkt_len,
 869                     tmp_len, hdrp->value));
 870 #if defined(_BIG_ENDIAN)
 871                 hdrp->value = ddi_swap64(tmp_len);
 872 #else
 873                 hdrp->value = tmp_len;
 874 #endif
 875                 NXGE_DEBUG_MSG((nxgep,
 876                     TX_CTL, "==> nxge_start(15_x2): setting SOP "
 877                     "after SWAP: tot_xfer_len 0x%llx pkt_len %d "
 878                     "tmp_len 0x%llx hdrp->value 0x%llx",
 879                     tot_xfer_len, pkt_len,
 880                     tmp_len, hdrp->value));
 881         }
 882 
 883         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(15): setting SOP "
 884             "wr_index %d "
 885             "tot_xfer_len (%d) pkt_len %d npads %d",
 886             sop_index,
 887             tot_xfer_len, pkt_len,
 888             npads));
 889 
 890         sop_tx_desc_p->bits.hdw.sop = 1;
 891         sop_tx_desc_p->bits.hdw.mark = mark_mode;
 892         sop_tx_desc_p->bits.hdw.num_ptr = ngathers;
 893 
 894         NXGE_MEM_PIO_WRITE64(npi_desc_handle, sop_tx_desc_p->value);
 895 
 896         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(16): set SOP done"));
 897 
 898 #ifdef NXGE_DEBUG
 899         npi_desc_handle.nxgep = nxgep;
 900         npi_desc_handle.function.function = nxgep->function_num;
 901         npi_desc_handle.function.instance = nxgep->instance;
 902 
 903         NXGE_DEBUG_MSG((nxgep, TX_CTL, "\n\t: value 0x%llx\n"
 904             "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\tmark %d sop %d\n",
 905             save_desc_p->value,
 906             sad,
 907             save_desc_p->bits.hdw.tr_len,
 908             xfer_len,
 909             save_desc_p->bits.hdw.num_ptr,
 910             save_desc_p->bits.hdw.mark,
 911             save_desc_p->bits.hdw.sop));
 912         (void) npi_txdma_dump_desc_one(npi_desc_handle, NULL, sop_index);
 913 
 914         dump_len = (pkt_len > 128) ? 128: pkt_len;
 915         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 916             "==> nxge_start: dump packets(17) (after sop set, len "
 917             " (len/dump_len/pkt_len/tot_xfer_len) %d/%d/%d/%d):\n"
 918             "ptr $%p: %s", len, dump_len, pkt_len, tot_xfer_len,
 919             (char *)hdrp,
 920             nxge_dump_packet((char *)hdrp, dump_len)));
 921         NXGE_DEBUG_MSG((nxgep, TX_CTL,
 922             "==> nxge_start(18): TX desc sync: sop_index %d",
 923             sop_index));
 924 #endif
 925 
 926         if ((ngathers == 1) || tx_ring_p->wr_index < i) {
 927                 (void) ddi_dma_sync(tx_desc_dma_handle,
 928                     sop_index * sizeof (tx_desc_t),
 929                     ngathers * sizeof (tx_desc_t),
 930                     DDI_DMA_SYNC_FORDEV);
 931 
 932                 NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(19): sync 1 "
 933                     "cs_off = 0x%02X cs_s_off = 0x%02X "
 934                     "pkt_len %d ngathers %d sop_index %d\n",
 935                     stuff_offset, start_offset,
 936                     pkt_len, ngathers, sop_index));
 937         } else { /* more than one descriptor and wrap around */
 938                 uint32_t nsdescs = tx_ring_p->tx_ring_size - sop_index;
 939                 (void) ddi_dma_sync(tx_desc_dma_handle,
 940                     sop_index * sizeof (tx_desc_t),
 941                     nsdescs * sizeof (tx_desc_t),
 942                     DDI_DMA_SYNC_FORDEV);
 943                 NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(20): sync 1 "
 944                     "cs_off = 0x%02X cs_s_off = 0x%02X "
 945                     "pkt_len %d ngathers %d sop_index %d\n",
 946                     stuff_offset, start_offset,
 947                     pkt_len, ngathers, sop_index));
 948 
 949                 (void) ddi_dma_sync(tx_desc_dma_handle,
 950                     0,
 951                     (ngathers - nsdescs) * sizeof (tx_desc_t),
 952                     DDI_DMA_SYNC_FORDEV);
 953                 NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(21): sync 2 "
 954                     "cs_off = 0x%02X cs_s_off = 0x%02X "
 955                     "pkt_len %d ngathers %d sop_index %d\n",
 956                     stuff_offset, start_offset,
 957                     pkt_len, ngathers, sop_index));
 958         }
 959 
 960         tail_index = tx_ring_p->wr_index;
 961         tail_wrap = tx_ring_p->wr_index_wrap;
 962 
 963         tx_ring_p->wr_index = i;
 964         if (tx_ring_p->wr_index <= tail_index) {
 965                 tx_ring_p->wr_index_wrap = ((tail_wrap == B_TRUE) ?
 966                     B_FALSE : B_TRUE);
 967         }
 968 
 969         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: TX kick: "
 970             "channel %d wr_index %d wrap %d ngathers %d desc_pend %d",
 971             tx_ring_p->tdc,
 972             tx_ring_p->wr_index,
 973             tx_ring_p->wr_index_wrap,
 974             ngathers,
 975             tx_ring_p->descs_pending));
 976 
 977         if (is_lso) {
 978                 lso_ngathers += ngathers;
 979                 if (mp_chain != NULL) {
 980                         mp = mp_chain;
 981                         mp_chain = mp_chain->b_next;
 982                         mp->b_next = NULL;
 983                         if (nxge_lso_kick_cnt == lso_ngathers) {
 984                                 tx_ring_p->descs_pending += lso_ngathers;
 985                                 {
 986                                         tx_ring_kick_t          kick;
 987 
 988                                         kick.value = 0;
 989                                         kick.bits.ldw.wrap =
 990                                             tx_ring_p->wr_index_wrap;
 991                                         kick.bits.ldw.tail =
 992                                             (uint16_t)tx_ring_p->wr_index;
 993 
 994                                         /* Kick the Transmit kick register */
 995                                         TXDMA_REG_WRITE64(
 996                                             NXGE_DEV_NPI_HANDLE(nxgep),
 997                                             TX_RING_KICK_REG,
 998                                             (uint8_t)tx_ring_p->tdc,
 999                                             kick.value);
1000                                         tdc_stats->tx_starts++;
1001 
1002                                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
1003                                             "==> nxge_start: more LSO: "
1004                                             "LSO_CNT %d",
1005                                             lso_ngathers));
1006                                 }
1007                                 lso_ngathers = 0;
1008                                 ngathers = 0;
1009                                 cur_index_lso = sop_index = tx_ring_p->wr_index;
1010                                 lso_tail_wrap = tx_ring_p->wr_index_wrap;
1011                         }
1012                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
1013                             "==> nxge_start: lso again: "
1014                             "lso_gathers %d ngathers %d cur_index_lso %d "
1015                             "wr_index %d sop_index %d",
1016                             lso_ngathers, ngathers, cur_index_lso,
1017                             tx_ring_p->wr_index, sop_index));
1018 
1019                         NXGE_DEBUG_MSG((nxgep, TX_CTL,
1020                             "==> nxge_start: next : count %d",
1021                             lso_ngathers));
1022                         lso_again = B_TRUE;
1023                         goto start_again;
1024                 }
1025                 ngathers = lso_ngathers;
1026         }
1027 
1028         NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: TX KICKING: "));
1029 
1030         {
1031                 tx_ring_kick_t          kick;
1032 
1033                 kick.value = 0;
1034                 kick.bits.ldw.wrap = tx_ring_p->wr_index_wrap;
1035                 kick.bits.ldw.tail = (uint16_t)tx_ring_p->wr_index;
1036 
1037                 /* Kick start the Transmit kick register */
1038                 TXDMA_REG_WRITE64(NXGE_DEV_NPI_HANDLE(nxgep),
1039                     TX_RING_KICK_REG,
1040                     (uint8_t)tx_ring_p->tdc,
1041                     kick.value);
1042         }
1043 
1044         tx_ring_p->descs_pending += ngathers;
1045         tdc_stats->tx_starts++;
1046 
1047         if (isLDOMservice(nxgep)) {
1048                 tx_ring_p->tx_ring_busy = B_FALSE;
1049                 if (tx_ring_p->tx_ring_offline) {
1050                         (void) atomic_swap_32(&tx_ring_p->tx_ring_offline,
1051                             NXGE_TX_RING_OFFLINED);
1052                 }
1053         }
1054 
1055         MUTEX_EXIT(&tx_ring_p->lock);
1056 
1057         NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_start"));

1058         return (status);
1059 
1060 nxge_start_fail_lso:
1061         status = 0;
1062         good_packet = B_FALSE;
1063         if (mp != NULL) {
1064                 freemsg(mp);
1065         }
1066         if (mp_chain != NULL) {
1067                 freemsg(mp_chain);
1068         }
1069         if (!lso_again && !ngathers) {
1070                 if (isLDOMservice(nxgep)) {
1071                         tx_ring_p->tx_ring_busy = B_FALSE;
1072                         if (tx_ring_p->tx_ring_offline) {
1073                                 (void) atomic_swap_32(
1074                                     &tx_ring_p->tx_ring_offline,
1075                                     NXGE_TX_RING_OFFLINED);
1076                         }
1077                 }
1078 
1079                 MUTEX_EXIT(&tx_ring_p->lock);
1080                 NXGE_DEBUG_MSG((nxgep, TX_CTL,
1081                     "==> nxge_start: lso exit (nothing changed)"));
1082                 goto nxge_start_fail1;
1083         }
1084 
1085         NXGE_DEBUG_MSG((nxgep, TX_CTL,
1086             "==> nxge_start (channel %d): before lso "
1087             "lso_gathers %d ngathers %d cur_index_lso %d "
1088             "wr_index %d sop_index %d lso_again %d",
1089             tx_ring_p->tdc,
1090             lso_ngathers, ngathers, cur_index_lso,
1091             tx_ring_p->wr_index, sop_index, lso_again));
1092 
1093         if (lso_again) {
1094                 lso_ngathers += ngathers;
1095                 ngathers = lso_ngathers;
1096                 sop_index = cur_index_lso;
1097                 tx_ring_p->wr_index = sop_index;
1098                 tx_ring_p->wr_index_wrap = lso_tail_wrap;
1099         }
1100 
1101         NXGE_DEBUG_MSG((nxgep, TX_CTL,
1102             "==> nxge_start (channel %d): after lso "
1103             "lso_gathers %d ngathers %d cur_index_lso %d "
1104             "wr_index %d sop_index %d lso_again %d",
1105             tx_ring_p->tdc,
1106             lso_ngathers, ngathers, cur_index_lso,
1107             tx_ring_p->wr_index, sop_index, lso_again));
1108 
1109 nxge_start_fail2:
1110         if (good_packet == B_FALSE) {
1111                 cur_index = sop_index;
1112                 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: clean up"));
1113                 for (i = 0; i < ngathers; i++) {
1114                         tx_desc_p = &tx_desc_ring_vp[cur_index];
1115 #if defined(__i386)
1116                         npi_handle.regp = (uint32_t)tx_desc_p;
1117 #else
1118                         npi_handle.regp = (uint64_t)tx_desc_p;
1119 #endif
1120                         tx_msg_p = &tx_msg_ring[cur_index];
1121                         (void) npi_txdma_desc_set_zero(npi_handle, 1);
1122                         if (tx_msg_p->flags.dma_type == USE_DVMA) {
1123                                 NXGE_DEBUG_MSG((nxgep, TX_CTL,
1124                                     "tx_desc_p = %X index = %d",
1125                                     tx_desc_p, tx_ring_p->rd_index));
1126                                 (void) dvma_unload(tx_msg_p->dvma_handle,
1127                                     0, -1);
1128                                 tx_msg_p->dvma_handle = NULL;
1129                                 if (tx_ring_p->dvma_wr_index ==
1130                                     tx_ring_p->dvma_wrap_mask)
1131                                         tx_ring_p->dvma_wr_index = 0;
1132                                 else
1133                                         tx_ring_p->dvma_wr_index++;
1134                                 tx_ring_p->dvma_pending--;
1135                         } else if (tx_msg_p->flags.dma_type == USE_DMA) {
1136                                 if (ddi_dma_unbind_handle(
1137                                     tx_msg_p->dma_handle)) {
1138                                         cmn_err(CE_WARN, "!nxge_start: "
1139                                             "ddi_dma_unbind_handle failed");
1140                                 }
1141                         }
1142                         tx_msg_p->flags.dma_type = USE_NONE;
1143                         cur_index = TXDMA_DESC_NEXT_INDEX(cur_index, 1,
1144                             tx_ring_p->tx_wrap_mask);
1145 
1146                 }


1147         }
1148 
1149         if (isLDOMservice(nxgep)) {
1150                 tx_ring_p->tx_ring_busy = B_FALSE;
1151                 if (tx_ring_p->tx_ring_offline) {
1152                         (void) atomic_swap_32(&tx_ring_p->tx_ring_offline,
1153                             NXGE_TX_RING_OFFLINED);
1154                 }
1155         }
1156 
1157         MUTEX_EXIT(&tx_ring_p->lock);
1158 
1159 nxge_start_fail1:
1160         /* Add FMA to check the access handle nxge_hregh */
1161 
1162         NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_start"));

1163         return (status);
1164 }
1165 
1166 int
1167 nxge_serial_tx(mblk_t *mp, void *arg)
1168 {
1169         p_tx_ring_t             tx_ring_p = (p_tx_ring_t)arg;
1170         p_nxge_t                nxgep = tx_ring_p->nxgep;
1171         int                     status = 0;
1172 
1173         if (isLDOMservice(nxgep)) {
1174                 if (tx_ring_p->tx_ring_offline) {
1175                         freemsg(mp);
1176                         return (status);
1177                 }
1178         }
1179 
1180         status = nxge_start(nxgep, tx_ring_p, mp);
1181         return (status);
1182 }
1183 
























































1184 /*
1185  * nxge_m_tx() - send a chain of packets
1186  * XXX This routine is needed for a special case where a service domain
1187  * sends packets on behalf of clients with bound shares
1188  * We use the first ring of default tx group.
1189  */
1190 mblk_t *
1191 nxge_m_tx(void *arg, mblk_t *mp)
1192 {
1193         p_nxge_t                nxgep = (p_nxge_t)arg;
1194         mblk_t                  *next;
1195         p_tx_ring_t             *tx_rings;
1196         p_tx_ring_t             tx_ring_p;
1197         nxge_grp_t              *group;
1198 
1199 
1200         NXGE_DEBUG_MSG((nxgep, DDI_CTL, "==> nxge_m_tx"));
1201 
1202         if ((!(nxgep->drv_state & STATE_HW_INITIALIZED)) ||
1203             (nxgep->nxge_mac_state != NXGE_MAC_STARTED)) {
1204                 NXGE_DEBUG_MSG((nxgep, DDI_CTL,
1205                     "==> nxge_m_tx: hardware not initialized"));
1206                 NXGE_DEBUG_MSG((nxgep, DDI_CTL,
1207                     "<== nxge_m_tx"));
1208                 freemsgchain(mp);
1209                 mp = NULL;
1210                 return (mp);
1211         }
1212 
1213         group = nxgep->tx_set.group[0];      /* The default group */


1214 
1215         tx_rings = nxgep->tx_rings->rings;
1216         tx_ring_p = tx_rings[group->legend[0]]; /* First ring */
1217 
1218         while (mp != NULL) {
1219                 next = mp->b_next;
1220                 mp->b_next = NULL;
1221                 if (nxge_start(nxgep, tx_ring_p, mp)) {
1222                         NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_send: failed "
1223                             "ring index %d", ring_index));





1224                         mp->b_next = next;
1225                         break;
1226                 }

1227                 mp = next;
1228 
1229                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1230                     "==> nxge_m_tx: (go back to loop) mp $%p next $%p",
1231                     mp, next));
1232         }
1233 
1234         NXGE_DEBUG_MSG((nxgep, DDI_CTL, "<== nxge_m_tx"));
1235         return (mp);
1236 }
1237 





























































































































































1238 /* Software LSO starts here */
1239 static void
1240 nxge_hcksum_retrieve(mblk_t *mp,
1241     uint32_t *start, uint32_t *stuff, uint32_t *end,
1242     uint32_t *value, uint32_t *flags)
1243 {
1244         if (mp->b_datap->db_type == M_DATA) {
1245                 if (flags != NULL) {
1246                         *flags = DB_CKSUMFLAGS(mp) & (HCK_IPV4_HDRCKSUM |
1247                             HCK_PARTIALCKSUM | HCK_FULLCKSUM |
1248                             HCK_FULLCKSUM_OK);
1249                         if ((*flags & (HCK_PARTIALCKSUM |
1250                             HCK_FULLCKSUM)) != 0) {
1251                                 if (value != NULL)
1252                                         *value = (uint32_t)DB_CKSUM16(mp);
1253                                 if ((*flags & HCK_PARTIALCKSUM) != 0) {
1254                                         if (start != NULL)
1255                                                 *start =
1256                                                     (uint32_t)DB_CKSUMSTART(mp);
1257                                         if (stuff != NULL)
1258                                                 *stuff =
1259                                                     (uint32_t)DB_CKSUMSTUFF(mp);
1260                                         if (end != NULL)
1261                                                 *end =
1262                                                     (uint32_t)DB_CKSUMEND(mp);
1263                                 }
1264                         }
1265                 }
1266         }
1267 }
1268 
1269 static void
1270 nxge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
1271 {
1272         ASSERT(DB_TYPE(mp) == M_DATA);
1273 
1274         *mss = 0;
1275         if (flags != NULL) {
1276                 *flags = DB_CKSUMFLAGS(mp) & HW_LSO;
1277                 if ((*flags != 0) && (mss != NULL)) {
1278                         *mss = (uint32_t)DB_LSOMSS(mp);
1279                 }
1280                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1281                     "==> nxge_lso_info_get(flag !=NULL): mss %d *flags 0x%x",
1282                     *mss, *flags));
1283         }
1284 
1285         NXGE_DEBUG_MSG((NULL, TX_CTL,
1286             "<== nxge_lso_info_get: mss %d", *mss));
1287 }
1288 
1289 /*
1290  * Do Soft LSO on the oversized packet.
1291  *
1292  * 1. Create a chain of message for headers.
1293  * 2. Fill up header messages with proper information.
1294  * 3. Copy Eithernet, IP, and TCP headers from the original message to
1295  *    each new message with necessary adjustments.
1296  *    * Unchange the ethernet header for DIX frames. (by default)
1297  *    * IP Total Length field is updated to MSS or less(only for the last one).
1298  *    * IP Identification value is incremented by one for each packet.
1299  *    * TCP sequence Number is recalculated according to the payload length.
1300  *    * Set FIN and/or PSH flags for the *last* packet if applied.
1301  *    * TCP partial Checksum
1302  * 4. Update LSO information in the first message header.
1303  * 5. Release the original message header.
1304  */
1305 static mblk_t *
1306 nxge_do_softlso(mblk_t *mp, uint32_t mss)
1307 {
1308         uint32_t        hckflags;
1309         int             pktlen;
1310         int             hdrlen;
1311         int             segnum;
1312         int             i;
1313         struct ether_vlan_header *evh;
1314         int             ehlen, iphlen, tcphlen;
1315         struct ip       *oiph, *niph;
1316         struct tcphdr *otcph, *ntcph;
1317         int             available, len, left;
1318         uint16_t        ip_id;
1319         uint32_t        tcp_seq;
1320 #ifdef __sparc
1321         uint32_t        tcp_seq_tmp;
1322 #endif
1323         mblk_t          *datamp;
1324         uchar_t         *rptr;
1325         mblk_t          *nmp;
1326         mblk_t          *cmp;
1327         mblk_t          *mp_chain;
1328         boolean_t do_cleanup = B_FALSE;
1329         t_uscalar_t start_offset = 0;
1330         t_uscalar_t stuff_offset = 0;
1331         t_uscalar_t value = 0;
1332         uint16_t        l4_len;
1333         ipaddr_t        src, dst;
1334         uint32_t        cksum, sum, l4cksum;
1335 
1336         NXGE_DEBUG_MSG((NULL, TX_CTL,
1337             "==> nxge_do_softlso"));
1338         /*
1339          * check the length of LSO packet payload and calculate the number of
1340          * segments to be generated.
1341          */
1342         pktlen = msgsize(mp);
1343         evh = (struct ether_vlan_header *)mp->b_rptr;
1344 
1345         /* VLAN? */
1346         if (evh->ether_tpid == htons(ETHERTYPE_VLAN))
1347                 ehlen = sizeof (struct ether_vlan_header);
1348         else
1349                 ehlen = sizeof (struct ether_header);
1350         oiph = (struct ip *)(mp->b_rptr + ehlen);
1351         iphlen = oiph->ip_hl * 4;
1352         otcph = (struct tcphdr *)(mp->b_rptr + ehlen + iphlen);
1353         tcphlen = otcph->th_off * 4;
1354 
1355         l4_len = pktlen - ehlen - iphlen;
1356 
1357         NXGE_DEBUG_MSG((NULL, TX_CTL,
1358             "==> nxge_do_softlso: mss %d oiph $%p "
1359             "original ip_sum oiph->ip_sum 0x%x "
1360             "original tcp_sum otcph->th_sum 0x%x "
1361             "oiph->ip_len %d pktlen %d ehlen %d "
1362             "l4_len %d (0x%x) ip_len - iphlen %d ",
1363             mss,
1364             oiph,
1365             oiph->ip_sum,
1366             otcph->th_sum,
1367             ntohs(oiph->ip_len), pktlen,
1368             ehlen,
1369             l4_len,
1370             l4_len,
1371             ntohs(oiph->ip_len) - iphlen));
1372 
1373         /* IPv4 + TCP */
1374         if (!(oiph->ip_v == IPV4_VERSION)) {
1375                 NXGE_ERROR_MSG((NULL, NXGE_ERR_CTL,
1376                     "<== nxge_do_softlso: not IPV4 "
1377                     "oiph->ip_len %d pktlen %d ehlen %d tcphlen %d",
1378                     ntohs(oiph->ip_len), pktlen, ehlen,
1379                     tcphlen));
1380                 freemsg(mp);
1381                 return (NULL);
1382         }
1383 
1384         if (!(oiph->ip_p == IPPROTO_TCP)) {
1385                 NXGE_ERROR_MSG((NULL, NXGE_ERR_CTL,
1386                     "<== nxge_do_softlso: not TCP "
1387                     "oiph->ip_len %d pktlen %d ehlen %d tcphlen %d",
1388                     ntohs(oiph->ip_len), pktlen, ehlen,
1389                     tcphlen));
1390                 freemsg(mp);
1391                 return (NULL);
1392         }
1393 
1394         if (!(ntohs(oiph->ip_len) == pktlen - ehlen)) {
1395                 NXGE_ERROR_MSG((NULL, NXGE_ERR_CTL,
1396                     "<== nxge_do_softlso: len not matched  "
1397                     "oiph->ip_len %d pktlen %d ehlen %d tcphlen %d",
1398                     ntohs(oiph->ip_len), pktlen, ehlen,
1399                     tcphlen));
1400                 freemsg(mp);
1401                 return (NULL);
1402         }
1403 
1404         otcph = (struct tcphdr *)(mp->b_rptr + ehlen + iphlen);
1405         tcphlen = otcph->th_off * 4;
1406 
1407         /* TCP flags can not include URG, RST, or SYN */
1408         VERIFY((otcph->th_flags & (TH_SYN | TH_RST | TH_URG)) == 0);
1409 
1410         hdrlen = ehlen + iphlen + tcphlen;
1411 
1412         VERIFY(MBLKL(mp) >= hdrlen);
1413 
1414         if (MBLKL(mp) > hdrlen) {
1415                 datamp = mp;
1416                 rptr = mp->b_rptr + hdrlen;
1417         } else { /* = */
1418                 datamp = mp->b_cont;
1419                 rptr = datamp->b_rptr;
1420         }
1421 
1422         NXGE_DEBUG_MSG((NULL, TX_CTL,
1423             "nxge_do_softlso: otcph $%p pktlen: %d, "
1424             "hdrlen %d ehlen %d iphlen %d tcphlen %d "
1425             "mblkl(mp): %d, mblkl(datamp): %d",
1426             otcph,
1427             pktlen, hdrlen, ehlen, iphlen, tcphlen,
1428             (int)MBLKL(mp), (int)MBLKL(datamp)));
1429 
1430         hckflags = 0;
1431         nxge_hcksum_retrieve(mp,
1432             &start_offset, &stuff_offset, &value, NULL, &hckflags);
1433 
1434         dst = oiph->ip_dst.s_addr;
1435         src = oiph->ip_src.s_addr;
1436 
1437         cksum = (dst >> 16) + (dst & 0xFFFF) +
1438             (src >> 16) + (src & 0xFFFF);
1439         l4cksum = cksum + IP_TCP_CSUM_COMP;
1440 
1441         sum = l4_len + l4cksum;
1442         sum = (sum & 0xFFFF) + (sum >> 16);
1443 
1444         NXGE_DEBUG_MSG((NULL, TX_CTL,
1445             "==> nxge_do_softlso: dst 0x%x src 0x%x sum 0x%x ~new 0x%x "
1446             "hckflags 0x%x start_offset %d stuff_offset %d "
1447             "value (original) 0x%x th_sum 0x%x "
1448             "pktlen %d l4_len %d (0x%x) "
1449             "MBLKL(mp): %d, MBLKL(datamp): %d dump header %s",
1450             dst, src,
1451             (sum & 0xffff), (~sum & 0xffff),
1452             hckflags, start_offset, stuff_offset,
1453             value, otcph->th_sum,
1454             pktlen,
1455             l4_len,
1456             l4_len,
1457             ntohs(oiph->ip_len) - (int)MBLKL(mp),
1458             (int)MBLKL(datamp),
1459             nxge_dump_packet((char *)evh, 12)));
1460 
1461         /*
1462          * Start to process.
1463          */
1464         available = pktlen - hdrlen;
1465         segnum = (available - 1) / mss + 1;
1466 
1467         NXGE_DEBUG_MSG((NULL, TX_CTL,
1468             "==> nxge_do_softlso: pktlen %d "
1469             "MBLKL(mp): %d, MBLKL(datamp): %d "
1470             "available %d mss %d segnum %d",
1471             pktlen, (int)MBLKL(mp), (int)MBLKL(datamp),
1472             available,
1473             mss,
1474             segnum));
1475 
1476         VERIFY(segnum >= 2);
1477 
1478         /*
1479          * Try to pre-allocate all header messages
1480          */
1481         mp_chain = NULL;
1482         for (i = 0; i < segnum; i++) {
1483                 if ((nmp = allocb(hdrlen, 0)) == NULL) {
1484                         /* Clean up the mp_chain */
1485                         while (mp_chain != NULL) {
1486                                 nmp = mp_chain;
1487                                 mp_chain = mp_chain->b_next;
1488                                 freemsg(nmp);
1489                         }
1490                         NXGE_DEBUG_MSG((NULL, TX_CTL,
1491                             "<== nxge_do_softlso: "
1492                             "Could not allocate enough messages for headers!"));
1493                         freemsg(mp);
1494                         return (NULL);
1495                 }
1496                 nmp->b_next = mp_chain;
1497                 mp_chain = nmp;
1498 
1499                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1500                     "==> nxge_do_softlso: "
1501                     "mp $%p nmp $%p mp_chain $%p mp_chain->b_next $%p",
1502                     mp, nmp, mp_chain, mp_chain->b_next));
1503         }
1504 
1505         NXGE_DEBUG_MSG((NULL, TX_CTL,
1506             "==> nxge_do_softlso: mp $%p nmp $%p mp_chain $%p",
1507             mp, nmp, mp_chain));
1508 
1509         /*
1510          * Associate payload with new packets
1511          */
1512         cmp = mp_chain;
1513         left = available;
1514         while (cmp != NULL) {
1515                 nmp = dupb(datamp);
1516                 if (nmp == NULL) {
1517                         do_cleanup = B_TRUE;
1518                         NXGE_DEBUG_MSG((NULL, TX_CTL,
1519                             "==>nxge_do_softlso: "
1520                             "Can not dupb(datamp), have to do clean up"));
1521                         goto cleanup_allocated_msgs;
1522                 }
1523 
1524                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1525                     "==> nxge_do_softlso: (loop) before mp $%p cmp $%p "
1526                     "dupb nmp $%p len %d left %d msd %d ",
1527                     mp, cmp, nmp, len, left, mss));
1528 
1529                 cmp->b_cont = nmp;
1530                 nmp->b_rptr = rptr;
1531                 len = (left < mss) ? left : mss;
1532                 left -= len;
1533 
1534                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1535                     "==> nxge_do_softlso: (loop) after mp $%p cmp $%p "
1536                     "dupb nmp $%p len %d left %d mss %d ",
1537                     mp, cmp, nmp, len, left, mss));
1538                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1539                     "nxge_do_softlso: before available: %d, "
1540                     "left: %d, len: %d, segnum: %d MBLK(nmp): %d",
1541                     available, left, len, segnum, (int)MBLKL(nmp)));
1542 
1543                 len -= MBLKL(nmp);
1544                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1545                     "nxge_do_softlso: after available: %d, "
1546                     "left: %d, len: %d, segnum: %d MBLK(nmp): %d",
1547                     available, left, len, segnum, (int)MBLKL(nmp)));
1548 
1549                 while (len > 0) {
1550                         mblk_t *mmp = NULL;
1551 
1552                         NXGE_DEBUG_MSG((NULL, TX_CTL,
1553                             "nxge_do_softlso: (4) len > 0 available: %d, "
1554                             "left: %d, len: %d, segnum: %d MBLK(nmp): %d",
1555                             available, left, len, segnum, (int)MBLKL(nmp)));
1556 
1557                         if (datamp->b_cont != NULL) {
1558                                 datamp = datamp->b_cont;
1559                                 rptr = datamp->b_rptr;
1560                                 mmp = dupb(datamp);
1561                                 if (mmp == NULL) {
1562                                         do_cleanup = B_TRUE;
1563                                         NXGE_DEBUG_MSG((NULL, TX_CTL,
1564                                             "==> nxge_do_softlso: "
1565                                             "Can not dupb(datamp) (1), :"
1566                                             "have to do clean up"));
1567                                         NXGE_DEBUG_MSG((NULL, TX_CTL,
1568                                             "==> nxge_do_softlso: "
1569                                             "available: %d, left: %d, "
1570                                             "len: %d, MBLKL(nmp): %d",
1571                                             available, left, len,
1572                                             (int)MBLKL(nmp)));
1573                                         goto cleanup_allocated_msgs;
1574                                 }
1575                         } else {
1576                                 NXGE_ERROR_MSG((NULL, NXGE_ERR_CTL,
1577                                     "==> nxge_do_softlso: "
1578                                     "(1)available: %d, left: %d, "
1579                                     "len: %d, MBLKL(nmp): %d",
1580                                     available, left, len,
1581                                     (int)MBLKL(nmp)));
1582                                 cmn_err(CE_PANIC,
1583                                     "==> nxge_do_softlso: "
1584                                     "Pointers must have been corrupted!\n"
1585                                     "datamp: $%p, nmp: $%p, rptr: $%p",
1586                                     (void *)datamp,
1587                                     (void *)nmp,
1588                                     (void *)rptr);
1589                         }
1590                         nmp->b_cont = mmp;
1591                         nmp = mmp;
1592                         len -= MBLKL(nmp);
1593                 }
1594                 if (len < 0) {
1595                         nmp->b_wptr += len;
1596                         rptr = nmp->b_wptr;
1597                         NXGE_DEBUG_MSG((NULL, TX_CTL,
1598                             "(5) len < 0 (less than 0)"
1599                             "available: %d, left: %d, len: %d, MBLKL(nmp): %d",
1600                             available, left, len, (int)MBLKL(nmp)));
1601 
1602                 } else if (len == 0) {
1603                         if (datamp->b_cont != NULL) {
1604                                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1605                                     "(5) len == 0"
1606                                     "available: %d, left: %d, len: %d, "
1607                                     "MBLKL(nmp): %d",
1608                                     available, left, len, (int)MBLKL(nmp)));
1609                                 datamp = datamp->b_cont;
1610                                 rptr = datamp->b_rptr;
1611                         } else {
1612                                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1613                                     "(6)available b_cont == NULL : %d, "
1614                                     "left: %d, len: %d, MBLKL(nmp): %d",
1615                                     available, left, len, (int)MBLKL(nmp)));
1616 
1617                                 VERIFY(cmp->b_next == NULL);
1618                                 VERIFY(left == 0);
1619                                 break; /* Done! */
1620                         }
1621                 }
1622                 cmp = cmp->b_next;
1623 
1624                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1625                     "(7) do_softlso: "
1626                     "next mp in mp_chain available len != 0 : %d, "
1627                     "left: %d, len: %d, MBLKL(nmp): %d",
1628                     available, left, len, (int)MBLKL(nmp)));
1629         }
1630 
1631         /*
1632          * From now, start to fill up all headers for the first message
1633          * Hardware checksum flags need to be updated separately for FULLCKSUM
1634          * and PARTIALCKSUM cases. For full checksum, copy the original flags
1635          * into every new packet is enough. But for HCK_PARTIALCKSUM, all
1636          * required fields need to be updated properly.
1637          */
1638         nmp = mp_chain;
1639         bcopy(mp->b_rptr, nmp->b_rptr, hdrlen);
1640         nmp->b_wptr = nmp->b_rptr + hdrlen;
1641         niph = (struct ip *)(nmp->b_rptr + ehlen);
1642         niph->ip_len = htons(mss + iphlen + tcphlen);
1643         ip_id = ntohs(niph->ip_id);
1644         ntcph = (struct tcphdr *)(nmp->b_rptr + ehlen + iphlen);
1645 #ifdef __sparc
1646         bcopy((char *)&ntcph->th_seq, &tcp_seq_tmp, 4);
1647         tcp_seq = ntohl(tcp_seq_tmp);
1648 #else
1649         tcp_seq = ntohl(ntcph->th_seq);
1650 #endif
1651 
1652         ntcph->th_flags &= ~(TH_FIN | TH_PUSH | TH_RST);
1653 
1654         DB_CKSUMFLAGS(nmp) = (uint16_t)hckflags;
1655         DB_CKSUMSTART(nmp) = start_offset;
1656         DB_CKSUMSTUFF(nmp) = stuff_offset;
1657 
1658         /* calculate IP checksum and TCP pseudo header checksum */
1659         niph->ip_sum = 0;
1660         niph->ip_sum = (uint16_t)nxge_csgen((uint16_t *)niph, iphlen);
1661 
1662         l4_len = mss + tcphlen;
1663         sum = htons(l4_len) + l4cksum;
1664         sum = (sum & 0xFFFF) + (sum >> 16);
1665         ntcph->th_sum = (sum & 0xffff);
1666 
1667         NXGE_DEBUG_MSG((NULL, TX_CTL,
1668             "==> nxge_do_softlso: first mp $%p (mp_chain $%p) "
1669             "mss %d pktlen %d l4_len %d (0x%x) "
1670             "MBLKL(mp): %d, MBLKL(datamp): %d "
1671             "ip_sum 0x%x "
1672             "th_sum 0x%x sum 0x%x ) "
1673             "dump first ip->tcp %s",
1674             nmp, mp_chain,
1675             mss,
1676             pktlen,
1677             l4_len,
1678             l4_len,
1679             (int)MBLKL(mp), (int)MBLKL(datamp),
1680             niph->ip_sum,
1681             ntcph->th_sum,
1682             sum,
1683             nxge_dump_packet((char *)niph, 52)));
1684 
1685         cmp = nmp;
1686         while ((nmp = nmp->b_next)->b_next != NULL) {
1687                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1688                     "==>nxge_do_softlso: middle l4_len %d ", l4_len));
1689                 bcopy(cmp->b_rptr, nmp->b_rptr, hdrlen);
1690                 nmp->b_wptr = nmp->b_rptr + hdrlen;
1691                 niph = (struct ip *)(nmp->b_rptr + ehlen);
1692                 niph->ip_id = htons(++ip_id);
1693                 niph->ip_len = htons(mss + iphlen + tcphlen);
1694                 ntcph = (struct tcphdr *)(nmp->b_rptr + ehlen + iphlen);
1695                 tcp_seq += mss;
1696 
1697                 ntcph->th_flags &= ~(TH_FIN | TH_PUSH | TH_RST | TH_URG);
1698 
1699 #ifdef __sparc
1700                 tcp_seq_tmp = htonl(tcp_seq);
1701                 bcopy(&tcp_seq_tmp, (char *)&ntcph->th_seq, 4);
1702 #else
1703                 ntcph->th_seq = htonl(tcp_seq);
1704 #endif
1705                 DB_CKSUMFLAGS(nmp) = (uint16_t)hckflags;
1706                 DB_CKSUMSTART(nmp) = start_offset;
1707                 DB_CKSUMSTUFF(nmp) = stuff_offset;
1708 
1709                 /* calculate IP checksum and TCP pseudo header checksum */
1710                 niph->ip_sum = 0;
1711                 niph->ip_sum = (uint16_t)nxge_csgen((uint16_t *)niph, iphlen);
1712                 ntcph->th_sum = (sum & 0xffff);
1713 
1714                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1715                     "==> nxge_do_softlso: middle ip_sum 0x%x "
1716                     "th_sum 0x%x "
1717                     " mp $%p (mp_chain $%p) pktlen %d "
1718                     "MBLKL(mp): %d, MBLKL(datamp): %d ",
1719                     niph->ip_sum,
1720                     ntcph->th_sum,
1721                     nmp, mp_chain,
1722                     pktlen, (int)MBLKL(mp), (int)MBLKL(datamp)));
1723         }
1724 
1725         /* Last segment */
1726         /*
1727          * Set FIN and/or PSH flags if present only in the last packet.
1728          * The ip_len could be different from prior packets.
1729          */
1730         bcopy(cmp->b_rptr, nmp->b_rptr, hdrlen);
1731         nmp->b_wptr = nmp->b_rptr + hdrlen;
1732         niph = (struct ip *)(nmp->b_rptr + ehlen);
1733         niph->ip_id = htons(++ip_id);
1734         niph->ip_len = htons(msgsize(nmp->b_cont) + iphlen + tcphlen);
1735         ntcph = (struct tcphdr *)(nmp->b_rptr + ehlen + iphlen);
1736         tcp_seq += mss;
1737 #ifdef __sparc
1738         tcp_seq_tmp = htonl(tcp_seq);
1739         bcopy(&tcp_seq_tmp, (char *)&ntcph->th_seq, 4);
1740 #else
1741         ntcph->th_seq = htonl(tcp_seq);
1742 #endif
1743         ntcph->th_flags = (otcph->th_flags & ~TH_URG);
1744 
1745         DB_CKSUMFLAGS(nmp) = (uint16_t)hckflags;
1746         DB_CKSUMSTART(nmp) = start_offset;
1747         DB_CKSUMSTUFF(nmp) = stuff_offset;
1748 
1749         /* calculate IP checksum and TCP pseudo header checksum */
1750         niph->ip_sum = 0;
1751         niph->ip_sum = (uint16_t)nxge_csgen((uint16_t *)niph, iphlen);
1752 
1753         l4_len = ntohs(niph->ip_len) - iphlen;
1754         sum = htons(l4_len) + l4cksum;
1755         sum = (sum & 0xFFFF) + (sum >> 16);
1756         ntcph->th_sum = (sum & 0xffff);
1757 
1758         NXGE_DEBUG_MSG((NULL, TX_CTL,
1759             "==> nxge_do_softlso: last next "
1760             "niph->ip_sum 0x%x "
1761             "ntcph->th_sum 0x%x sum 0x%x "
1762             "dump last ip->tcp %s "
1763             "cmp $%p mp $%p (mp_chain $%p) pktlen %d (0x%x) "
1764             "l4_len %d (0x%x) "
1765             "MBLKL(mp): %d, MBLKL(datamp): %d ",
1766             niph->ip_sum,
1767             ntcph->th_sum, sum,
1768             nxge_dump_packet((char *)niph, 52),
1769             cmp, nmp, mp_chain,
1770             pktlen, pktlen,
1771             l4_len,
1772             l4_len,
1773             (int)MBLKL(mp), (int)MBLKL(datamp)));
1774 
1775 cleanup_allocated_msgs:
1776         if (do_cleanup) {
1777                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1778                     "==> nxge_do_softlso: "
1779                     "Failed allocating messages, "
1780                     "have to clean up and fail!"));
1781                 while (mp_chain != NULL) {
1782                         nmp = mp_chain;
1783                         mp_chain = mp_chain->b_next;
1784                         freemsg(nmp);
1785                 }
1786         }
1787         /*
1788          * We're done here, so just free the original message and return the
1789          * new message chain, that could be NULL if failed, back to the caller.
1790          */
1791         freemsg(mp);
1792 
1793         NXGE_DEBUG_MSG((NULL, TX_CTL,
1794             "<== nxge_do_softlso:mp_chain $%p", mp_chain));
1795         return (mp_chain);
1796 }
1797 
1798 /*
1799  * Will be called before NIC driver do further operation on the message.
1800  * The input message may include LSO information, if so, go to softlso logic
1801  * to eliminate the oversized LSO packet for the incapable underlying h/w.
1802  * The return could be the same non-LSO message or a message chain for LSO case.
1803  *
1804  * The driver needs to call this function per packet and process the whole chain
1805  * if applied.
1806  */
1807 static mblk_t *
1808 nxge_lso_eliminate(mblk_t *mp)
1809 {
1810         uint32_t lsoflags;
1811         uint32_t mss;
1812 
1813         NXGE_DEBUG_MSG((NULL, TX_CTL,
1814             "==>nxge_lso_eliminate:"));
1815         nxge_lso_info_get(mp, &mss, &lsoflags);
1816 
1817         if (lsoflags & HW_LSO) {
1818                 mblk_t *nmp;
1819 
1820                 NXGE_DEBUG_MSG((NULL, TX_CTL,
1821                     "==>nxge_lso_eliminate:"
1822                     "HW_LSO:mss %d mp $%p",
1823                     mss, mp));
1824                 if ((nmp = nxge_do_softlso(mp, mss)) != NULL) {
1825                         NXGE_DEBUG_MSG((NULL, TX_CTL,
1826                             "<== nxge_lso_eliminate: "
1827                             "LSO: nmp not NULL nmp $%p mss %d mp $%p",
1828                             nmp, mss, mp));
1829                         return (nmp);
1830                 } else {
1831                         NXGE_DEBUG_MSG((NULL, TX_CTL,
1832                             "<== nxge_lso_eliminate_ "
1833                             "LSO: failed nmp NULL nmp $%p mss %d mp $%p",
1834                             nmp, mss, mp));
1835                         return (NULL);
1836                 }
1837         }
1838 
1839         NXGE_DEBUG_MSG((NULL, TX_CTL,
1840             "<== nxge_lso_eliminate"));
1841         return (mp);
1842 }
1843 
1844 static uint32_t
1845 nxge_csgen(uint16_t *adr, int len)
1846 {
1847         int             i, odd;
1848         uint32_t        sum = 0;
1849         uint32_t        c = 0;
1850 
1851         odd = len % 2;
1852         for (i = 0; i < (len / 2); i++) {
1853                 sum += (adr[i] & 0xffff);
1854         }
1855         if (odd) {
1856                 sum += adr[len / 2] & 0xff00;
1857         }
1858         while ((c = ((sum & 0xffff0000) >> 16)) != 0) {
1859                 sum &= 0xffff;
1860                 sum += c;
1861         }
1862         return (~sum & 0xffff);
1863 }
--- EOF ---