Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 | /* * Copyright (c) 2015-2023 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This module implements the flow switch for Skywalk * * --- FLOW SWITCH --- * * For each switch, a lock protects deletion of ports. When configuring * or deleting a new port, the lock is acquired in exclusive mode (after * holding SK_LOCK). When forwarding, the lock is acquired in shared * mode (without SK_LOCK). The lock is held throughout the entire * forwarding cycle, during which the thread may incur in a page fault. * Hence it is important that sleepable shared locks are used. * * On the rx ring, the per-port lock is grabbed initially to reserve * a number of slot in the ring, then the lock is released, packets are * copied from source to destination, and then the lock is acquired again * and the receive ring is updated. (A similar thing is done on the tx * ring for NIC and host stack ports attached to the switch) * * When a netif is attached to a flowswitch, two kernel channels are opened: * The device and host channels. The device channel provides the device * datapath. The host channel is not used in the datapath. It is there * only for providing some callbacks for activating the hostna (e.g. * intercepting host packets). */ #include <net/bpf.h> #include <netinet/tcp_seq.h> #include <skywalk/os_skywalk_private.h> #include <skywalk/nexus/flowswitch/nx_flowswitch.h> #include <skywalk/nexus/flowswitch/fsw_var.h> #include <skywalk/nexus/upipe/nx_user_pipe.h> #include <skywalk/nexus/netif/nx_netif.h> #include <skywalk/nexus/nexus_var.h> #include <sys/protosw.h> #include <sys/domain.h> SYSCTL_EXTENSIBLE_NODE(_kern_skywalk, OID_AUTO, flowswitch, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk FlowSwitch"); static void nx_fsw_dom_init(struct nxdom *); static void nx_fsw_dom_terminate(struct nxdom *); static void nx_fsw_dom_fini(struct nxdom *); static int nx_fsw_dom_find_port(struct kern_nexus *, boolean_t, nexus_port_t *); static int nx_fsw_dom_bind_port(struct kern_nexus *, nexus_port_t *, struct nxbind *, void *); static int nx_fsw_dom_unbind_port(struct kern_nexus *, nexus_port_t); static int nx_fsw_dom_connect(struct kern_nexus_domain_provider *, struct kern_nexus *, struct kern_channel *, struct chreq *, struct kern_channel *, struct nxbind *, struct proc *); static void nx_fsw_dom_disconnect(struct kern_nexus_domain_provider *, struct kern_nexus *, struct kern_channel *); static void nx_fsw_dom_defunct(struct kern_nexus_domain_provider *, struct kern_nexus *, struct kern_channel *, struct proc *); static void nx_fsw_dom_defunct_finalize(struct kern_nexus_domain_provider *, struct kern_nexus *, struct kern_channel *, boolean_t); static int nx_fsw_prov_init(struct kern_nexus_domain_provider *); static int nx_fsw_prov_params_adjust(const struct kern_nexus_domain_provider *, const struct nxprov_params *, struct nxprov_adjusted_params *); static int nx_fsw_prov_params(struct kern_nexus_domain_provider *, const uint32_t, const struct nxprov_params *, struct nxprov_params *, struct skmem_region_params[SKMEM_REGIONS], uint32_t); static int nx_fsw_prov_mem_new(struct kern_nexus_domain_provider *, struct kern_nexus *, struct nexus_adapter *); static int nx_fsw_prov_config(struct kern_nexus_domain_provider *, struct kern_nexus *, struct nx_cfg_req *, int, struct proc *, kauth_cred_t); static void nx_fsw_prov_fini(struct kern_nexus_domain_provider *); static int nx_fsw_prov_nx_ctor(struct kern_nexus *); static void nx_fsw_prov_nx_dtor(struct kern_nexus *); static size_t nx_fsw_prov_mib_get(struct kern_nexus *nx, struct nexus_mib_filter *, void *__sized_by(len)out, size_t len, struct proc *); struct nxdom nx_flowswitch_dom_s = { .nxdom_prov_head = STAILQ_HEAD_INITIALIZER(nx_flowswitch_dom_s.nxdom_prov_head), .nxdom_type = NEXUS_TYPE_FLOW_SWITCH, .nxdom_md_type = NEXUS_META_TYPE_PACKET, .nxdom_md_subtype = NEXUS_META_SUBTYPE_RAW, .nxdom_name = "flowswitch", .nxdom_ports = { .nb_def = NX_FSW_VP_MAX, .nb_min = NX_FSW_VP_MIN, .nb_max = NX_FSW_VP_MAX, }, .nxdom_tx_rings = { .nb_def = 1, .nb_min = 1, .nb_max = NX_FSW_MAXRINGS, }, .nxdom_rx_rings = { .nb_def = 1, .nb_min = 1, .nb_max = NX_FSW_MAXRINGS, }, .nxdom_tx_slots = { .nb_def = NX_FSW_TXRINGSIZE, .nb_min = NX_FSW_MINSLOTS, .nb_max = NX_FSW_MAXSLOTS, }, .nxdom_rx_slots = { .nb_def = NX_FSW_RXRINGSIZE, .nb_min = NX_FSW_MINSLOTS, .nb_max = NX_FSW_MAXSLOTS, }, .nxdom_buf_size = { .nb_def = NX_FSW_BUFSIZE, .nb_min = NX_FSW_MINBUFSIZE, .nb_max = NX_FSW_MAXBUFSIZE, }, .nxdom_large_buf_size = { .nb_def = NX_FSW_DEF_LARGE_BUFSIZE, .nb_min = NX_FSW_MIN_LARGE_BUFSIZE, .nb_max = NX_FSW_MAX_LARGE_BUFSIZE, }, .nxdom_meta_size = { .nb_def = NX_FSW_UMD_SIZE, .nb_min = NX_FSW_UMD_SIZE, .nb_max = NX_METADATA_USR_MAX_SZ, }, .nxdom_stats_size = { .nb_def = 0, .nb_min = 0, .nb_max = NX_STATS_MAX_SZ, }, .nxdom_pipes = { .nb_def = 0, .nb_min = 0, .nb_max = NX_UPIPE_MAXPIPES, }, .nxdom_flowadv_max = { .nb_def = 0, .nb_min = 0, .nb_max = NX_FLOWADV_MAX, }, .nxdom_nexusadv_size = { .nb_def = 0, .nb_min = 0, .nb_max = NX_NEXUSADV_MAX_SZ, }, .nxdom_capabilities = { .nb_def = NXPCAP_USER_CHANNEL, .nb_min = 0, .nb_max = (NXPCAP_CHECKSUM_PARTIAL | NXPCAP_USER_PACKET_POOL | NXPCAP_USER_CHANNEL), }, .nxdom_qmap = { .nb_def = NEXUS_QMAP_TYPE_INVALID, .nb_min = NEXUS_QMAP_TYPE_INVALID, .nb_max = NEXUS_QMAP_TYPE_INVALID, }, .nxdom_max_frags = { .nb_def = NX_PBUF_FRAGS_DEFAULT, .nb_min = NX_PBUF_FRAGS_MIN, .nb_max = NX_PBUF_FRAGS_MAX, }, .nxdom_init = nx_fsw_dom_init, .nxdom_terminate = nx_fsw_dom_terminate, .nxdom_fini = nx_fsw_dom_fini, .nxdom_connect = nx_fsw_dom_connect, .nxdom_find_port = nx_fsw_dom_find_port, .nxdom_port_is_reserved = nx_fsw_dom_port_is_reserved, .nxdom_bind_port = nx_fsw_dom_bind_port, .nxdom_unbind_port = nx_fsw_dom_unbind_port, .nxdom_disconnect = nx_fsw_dom_disconnect, .nxdom_defunct = nx_fsw_dom_defunct, .nxdom_defunct_finalize = nx_fsw_dom_defunct_finalize, }; struct kern_nexus_domain_provider nx_fsw_prov_s = { .nxdom_prov_name = NEXUS_PROVIDER_FLOW_SWITCH, .nxdom_prov_flags = NXDOMPROVF_DEFAULT, .nxdom_prov_cb = { .dp_cb_init = nx_fsw_prov_init, .dp_cb_fini = nx_fsw_prov_fini, .dp_cb_params = nx_fsw_prov_params, .dp_cb_mem_new = nx_fsw_prov_mem_new, .dp_cb_config = nx_fsw_prov_config, .dp_cb_nx_ctor = nx_fsw_prov_nx_ctor, .dp_cb_nx_dtor = nx_fsw_prov_nx_dtor, .dp_cb_nx_mem_info = NULL, /* not supported */ .dp_cb_nx_mib_get = nx_fsw_prov_mib_get, .dp_cb_nx_stop = NULL, }, }; static void nx_fsw_dom_init(struct nxdom *nxdom) { SK_LOCK_ASSERT_HELD(); ASSERT(!(nxdom->nxdom_flags & NEXUSDOMF_INITIALIZED)); /* Generic initialization */ fsw_init(); fsw_dp_init(); (void) nxdom_prov_add(nxdom, &nx_fsw_prov_s); } static void nx_fsw_dom_terminate(struct nxdom *nxdom) { struct kern_nexus_domain_provider *nxdom_prov, *tnxdp; SK_LOCK_ASSERT_HELD(); STAILQ_FOREACH_SAFE(nxdom_prov, &nxdom->nxdom_prov_head, nxdom_prov_link, tnxdp) { (void) nxdom_prov_del(nxdom_prov); } fsw_dp_uninit(); /* Generic uninitialization */ fsw_uninit(); } static void nx_fsw_dom_fini(struct nxdom *nxdom) { #pragma unused(nxdom) } static int nx_fsw_prov_init(struct kern_nexus_domain_provider *nxdom_prov) { #pragma unused(nxdom_prov) SK_D("initializing %s", nxdom_prov->nxdom_prov_name); return 0; } static int nx_fsw_prov_params_adjust(const struct kern_nexus_domain_provider *nxdom_prov, const struct nxprov_params *nxp, struct nxprov_adjusted_params *adj) { #pragma unused(nxdom_prov, nxp) _CASSERT(NX_FSW_AFRINGSIZE <= NX_FSW_RXRINGSIZE); _CASSERT(NX_FSW_AFRINGSIZE <= NX_FSW_TXRINGSIZE); *(adj->adj_md_subtype) = NEXUS_META_SUBTYPE_PAYLOAD; *(adj->adj_stats_size) = sizeof(struct __nx_stats_fsw); VERIFY(sk_max_flows > 0 && sk_max_flows <= NX_FLOWADV_MAX); *(adj->adj_flowadv_max) = sk_max_flows; *(adj->adj_nexusadv_size) = sizeof(struct sk_nexusadv); *(adj->adj_caps) |= NXPCAP_USER_PACKET_POOL; if (sk_cksum_tx != 0) { *(adj->adj_caps) |= NXPCAP_CHECKSUM_PARTIAL; } *(adj->adj_alloc_rings) = *(adj->adj_free_rings) = ((nxp->nxp_max_frags > 1) && (sk_channel_buflet_alloc != 0)) ? 2 : 1; *(adj->adj_alloc_slots) = *(adj->adj_free_slots) = NX_FSW_AFRINGSIZE; if (!SKMEM_MEM_CONSTRAINED_DEVICE() && (*(adj->adj_buf_region_segment_size) < NX_FSW_BUF_SEG_SIZE)) { *(adj->adj_buf_region_segment_size) = NX_FSW_BUF_SEG_SIZE; } if (*(adj->adj_max_frags) > 1) { uint32_t fsw_maxbufs = SKMEM_MEM_CONSTRAINED_DEVICE() ? NX_FSW_MAXBUFFERS_MEM_CONSTRAINED : NX_FSW_MAXBUFFERS; uint32_t magazine_max_objs; *(adj->adj_max_buffers) = (sk_fsw_max_bufs != 0) ? sk_fsw_max_bufs : fsw_maxbufs; /* * Given that packet objects are the ones cached, use the * metadata size to determine the extra amount of objects * at magazine layer. */ magazine_max_objs = skmem_cache_magazine_max( NX_METADATA_PACKET_SZ(*(adj->adj_max_frags)) + METADATA_PREAMBLE_SZ); /* * Adjust the max buffers to account for the increase * associated with per-CPU caching. */ if (skmem_allow_magazines() && magazine_max_objs < *(adj->adj_max_buffers)) { *(adj->adj_max_buffers) -= magazine_max_objs; } } if (SKMEM_MEM_CONSTRAINED_DEVICE() || (fsw_use_dual_sized_pool == 0) || (*(adj->adj_max_frags) <= 1)) { *(adj->adj_large_buf_size) = 0; } return 0; } static int nx_fsw_prov_params(struct kern_nexus_domain_provider *nxdom_prov, const uint32_t req, const struct nxprov_params *nxp0, struct nxprov_params *nxp, struct skmem_region_params srp[SKMEM_REGIONS], uint32_t pp_region_config_flags) { struct nxdom *nxdom = nxdom_prov->nxdom_prov_dom; /* USD regions need to be writable to support user packet pool */ srp[SKMEM_REGION_TXAUSD].srp_cflags &= ~SKMEM_REGION_CR_UREADONLY; srp[SKMEM_REGION_RXFUSD].srp_cflags &= ~SKMEM_REGION_CR_UREADONLY; return nxprov_params_adjust(nxdom_prov, req, nxp0, nxp, srp, nxdom, nxdom, nxdom, pp_region_config_flags, nx_fsw_prov_params_adjust); } static void fsw_vp_region_params_setup(struct nexus_adapter *na, struct skmem_region_params *__counted_by(SKMEM_REGIONS)srp0, struct skmem_region_params *__counted_by(SKMEM_REGIONS)srp) { int i; uint32_t totalrings, nslots, afslots, evslots, lbaslots; /* copy default flowswitch parameters initialized in nxprov_params_adjust() */ for (i = 0; i < SKMEM_REGIONS; i++) { srp[i] = srp0[i]; } /* customize parameters that could vary across NAs */ totalrings = na_get_nrings(na, NR_TX) + na_get_nrings(na, NR_RX) + na_get_nrings(na, NR_A) + na_get_nrings(na, NR_F) + na_get_nrings(na, NR_EV) + na_get_nrings(na, NR_LBA); srp[SKMEM_REGION_SCHEMA].srp_r_obj_size = (uint32_t)CHANNEL_SCHEMA_SIZE(totalrings); srp[SKMEM_REGION_SCHEMA].srp_r_obj_cnt = totalrings; skmem_region_params_config(&srp[SKMEM_REGION_SCHEMA]); srp[SKMEM_REGION_RING].srp_r_obj_size = sizeof(struct __user_channel_ring); srp[SKMEM_REGION_RING].srp_r_obj_cnt = totalrings; skmem_region_params_config(&srp[SKMEM_REGION_RING]); nslots = na_get_nslots(na, NR_TX); afslots = na_get_nslots(na, NR_A); evslots = na_get_nslots(na, NR_EV); lbaslots = na_get_nslots(na, NR_LBA); srp[SKMEM_REGION_TXAKSD].srp_r_obj_size = MAX(MAX(MAX(nslots, afslots), evslots), lbaslots) * SLOT_DESC_SZ; srp[SKMEM_REGION_TXAKSD].srp_r_obj_cnt = na_get_nrings(na, NR_TX) + na_get_nrings(na, NR_A) + na_get_nrings(na, NR_EV) + na_get_nrings(na, NR_LBA); skmem_region_params_config(&srp[SKMEM_REGION_TXAKSD]); /* USD and KSD objects share the same size and count */ srp[SKMEM_REGION_TXAUSD].srp_r_obj_size = srp[SKMEM_REGION_TXAKSD].srp_r_obj_size; srp[SKMEM_REGION_TXAUSD].srp_r_obj_cnt = srp[SKMEM_REGION_TXAKSD].srp_r_obj_cnt; skmem_region_params_config(&srp[SKMEM_REGION_TXAUSD]); } static int nx_fsw_prov_mem_new(struct kern_nexus_domain_provider *nxdom_prov, struct kern_nexus *nx, struct nexus_adapter *na) { #pragma unused(nxdom_prov) int err = 0; struct skmem_region_params *srp0 = NX_PROV(nx)->nxprov_region_params; struct skmem_region_params srp[SKMEM_REGIONS]; SK_DF(SK_VERB_FSW, "nx 0x%llx (\"%s\":\"%s\") na \"%s\" (0x%llx)", SK_KVA(nx), NX_DOM(nx)->nxdom_name, nxdom_prov->nxdom_prov_name, na->na_name, SK_KVA(na)); ASSERT(na->na_type == NA_FLOWSWITCH_VP); ASSERT(na->na_arena == NULL); ASSERT((na->na_flags & NAF_USER_PKT_POOL) != 0); fsw_vp_region_params_setup(na, srp0, srp); /* * Each port in the flow switch is isolated from one another; * use NULL for the packet buffer pool references to indicate * this, since otherwise we'd be sharing the same pp for the * entire switch (maybe for a future, special use case?) * * This means that clients calling kern_nexus_get_pbufpool() * will get NULL, but this is fine based on current design * of providing port isolation, and also since we don't expose * the flow switch to external kernel clients. */ na->na_arena = skmem_arena_create_for_nexus(na, srp, NULL, NULL, FALSE, !NX_USER_CHANNEL_PROV(nx), &nx->nx_adv, &err); ASSERT(na->na_arena != NULL || err != 0); return err; } static int nx_fsw_prov_config(struct kern_nexus_domain_provider *nxdom_prov, struct kern_nexus *nx, struct nx_cfg_req *ncr, int sopt_dir, struct proc *p, kauth_cred_t cred) { #pragma unused(nxdom_prov) struct sockopt sopt; int err = 0; SK_LOCK_ASSERT_HELD(); if (ncr->nc_req == USER_ADDR_NULL) { err = EINVAL; goto done; } /* to make life easier for handling copies */ bzero(&sopt, sizeof(sopt)); sopt.sopt_dir = sopt_dir; sopt.sopt_val = ncr->nc_req; sopt.sopt_valsize = ncr->nc_req_len; sopt.sopt_p = p; /* avoid _MALLOCing at the cost of this ugly switch block */ switch (ncr->nc_cmd) { case NXCFG_CMD_ATTACH: case NXCFG_CMD_DETACH: { /* proceed only if the client possesses flow switch entitlement */ if (cred == NULL || (err = skywalk_priv_check_cred(p, cred, PRIV_SKYWALK_REGISTER_FLOW_SWITCH)) != 0) { SK_ERR("missing nxctl credential"); err = EPERM; goto done; } struct nx_spec_req nsr; bzero(&nsr, sizeof(nsr)); err = sooptcopyin(&sopt, &nsr, sizeof(nsr), sizeof(nsr)); if (err != 0) { goto done; } /* * Null-terminate in case this has an interface name; * the union is already large enough for uuid_t. */ nsr.nsr_name[sizeof(nsr.nsr_name) - 1] = '\0'; if (p != kernproc) { nsr.nsr_flags &= NXSPECREQ_MASK; } err = fsw_ctl(nx, ncr->nc_cmd, p, &nsr); if (err != 0) { goto done; } err = sooptcopyout(&sopt, &nsr, sizeof(nsr)); break; } case NXCFG_CMD_FLOW_ADD: case NXCFG_CMD_FLOW_DEL: { /* need to have owner nxctl or kernnxctl */ if (cred == NULL) { SK_ERR("missing nxctl credential"); err = EPERM; goto done; } } /* fall through */ case NXCFG_CMD_FLOW_CONFIG: { /* checks flow PID ownership instead of nxctl creditial */ struct nx_flow_req nfr; bzero(&nfr, sizeof(nfr)); err = sooptcopyin(&sopt, &nfr, sizeof(nfr), sizeof(nfr)); if (err != 0) { goto done; } err = fsw_ctl(nx, ncr->nc_cmd, p, &nfr); if (err != 0) { goto done; } err = sooptcopyout(&sopt, &nfr, sizeof(nfr)); break; } case NXCFG_CMD_NETEM: { struct if_netem_params inp; bzero(&inp, sizeof(inp)); err = sooptcopyin(&sopt, &inp, sizeof(inp), sizeof(inp)); if (err != 0) { goto done; } err = fsw_ctl(nx, ncr->nc_cmd, p, &inp); if (err != 0) { goto done; } break; } default: err = EINVAL; goto done; } done: SK_DF(err ? SK_VERB_ERROR: SK_VERB_FSW, "nexus 0x%llx (%s) cmd %d (err %d)", SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name, ncr->nc_cmd, err); return err; } static void nx_fsw_prov_fini(struct kern_nexus_domain_provider *nxdom_prov) { #pragma unused(nxdom_prov) SK_D("destroying %s", nxdom_prov->nxdom_prov_name); } static int nx_fsw_prov_nx_ctor(struct kern_nexus *nx) { struct nx_flowswitch *fsw; SK_LOCK_ASSERT_HELD(); ASSERT(nx->nx_arg == NULL); SK_D("nexus 0x%llx (%s)", SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name); fsw = fsw_alloc(Z_WAITOK); nx->nx_arg = fsw; fsw->fsw_nx = nx; fsw->fsw_tx_rings = NX_PROV(nx)->nxprov_params->nxp_tx_rings; fsw->fsw_rx_rings = NX_PROV(nx)->nxprov_params->nxp_rx_rings; FSW_WLOCK(fsw); fsw_dp_ctor(fsw); FSW_WUNLOCK(fsw); SK_D("create new fsw 0x%llx for nexus 0x%llx", SK_KVA(NX_FSW_PRIVATE(nx)), SK_KVA(nx)); return 0; } static void nx_fsw_prov_nx_dtor(struct kern_nexus *nx) { struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx); int err; SK_LOCK_ASSERT_HELD(); SK_D("nexus 0x%llx (%s) fsw 0x%llx", SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name, SK_KVA(fsw)); err = fsw_ctl_detach(nx, current_proc(), NULL); ASSERT(err == 0); /* this cannot fail */ ASSERT(fsw->fsw_dev_ch == NULL); ASSERT(fsw->fsw_host_ch == NULL); SK_DF(SK_VERB_FSW, "marking fsw 0x%llx as free", SK_KVA(fsw)); fsw_free(fsw); nx->nx_arg = NULL; } static size_t nx_fsw_prov_mib_get(struct kern_nexus *nx, struct nexus_mib_filter *filter, void *__sized_by(len)out, size_t len, struct proc *p) { struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx); size_t rlen; /* this check doesn't require holding fsw_lock */ if ((filter->nmf_bitmap & NXMIB_FILTER_NX_UUID) && (uuid_compare(filter->nmf_nx_uuid, fsw->fsw_nx->nx_uuid)) != 0) { return 0; } /* intercept NXMIB_FSW_STATS here since it's for flowswitch */ FSW_RLOCK(fsw); rlen = fsw_mib_get(fsw, filter, out, len, p); FSW_UNLOCK(fsw); return rlen; } boolean_t nx_fsw_dom_port_is_reserved(struct kern_nexus *nx, nexus_port_t nx_port) { #pragma unused(nx) return nx_port < NEXUS_PORT_FLOW_SWITCH_CLIENT; } static int nx_fsw_dom_find_port(struct kern_nexus *nx, boolean_t rsvd, nexus_port_t *nx_port) { struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx); nexus_port_t first, last, port; int error; ASSERT(nx_port != NULL); port = *nx_port; ASSERT(port == NEXUS_PORT_ANY); if (rsvd) { first = 0; last = NEXUS_PORT_FLOW_SWITCH_CLIENT; } else { first = NEXUS_PORT_FLOW_SWITCH_CLIENT; ASSERT(NXDOM_MAX(NX_DOM(nx), ports) <= NEXUS_PORT_MAX); last = (nexus_port_size_t)NXDOM_MAX(NX_DOM(nx), ports); } ASSERT(first <= last); FSW_WLOCK(fsw); if (__improbable(first == last)) { error = ENOMEM; } else { error = nx_port_find(nx, first, last - 1, &port); ASSERT(error != 0 || (port >= first && port < last)); } FSW_WUNLOCK(fsw); SK_DF(error ? SK_VERB_ERROR : SK_VERB_FSW, "nx 0x%llx \"%s\" %snx_port %d [%u,%u] (err %d)", SK_KVA(nx), nx->nx_prov->nxprov_params->nxp_name, (rsvd ? "[reserved] " : ""), (int)port, first, (last - 1), error); if (error == 0) { *nx_port = port; } return error; } static int nx_fsw_dom_bind_port(struct kern_nexus *nx, nexus_port_t *nx_port, struct nxbind *nxb, void *info) { #pragma unused(info) struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx); nexus_port_t first, last, port; int error; ASSERT(nx_port != NULL); ASSERT(nxb != NULL); port = *nx_port; /* can't bind reserved ports to client credentials */ if (nx_fsw_dom_port_is_reserved(nx, port)) { return EDOM; } /* * Allow clients to bind to regular ports (non-reserved); * reserved ports aren't subject to bind/unbind, since * they are used for internal purposes. */ first = NEXUS_PORT_FLOW_SWITCH_CLIENT; ASSERT(NXDOM_MAX(NX_DOM(nx), ports) <= NEXUS_PORT_MAX); last = (nexus_port_size_t)NXDOM_MAX(NX_DOM(nx), ports); ASSERT(first <= last); FSW_WLOCK(fsw); if (__improbable(first == last)) { error = ENOMEM; } else if (port != NEXUS_PORT_ANY) { error = nx_port_bind(nx, port, nxb); } else { error = nx_port_find(nx, first, last - 1, &port); ASSERT(error != 0 || (port >= first && port < last)); if (error == 0) { error = nx_port_bind(nx, port, nxb); } } FSW_WUNLOCK(fsw); SK_DF(error ? SK_VERB_ERROR : SK_VERB_FSW, "nx 0x%llx \"%s\" nx_port %d [%u,%u] (err %d)", SK_KVA(nx), nx->nx_prov->nxprov_params->nxp_name, (int)port, first, (last - 1), error); ASSERT(*nx_port == NEXUS_PORT_ANY || *nx_port == port); if (error == 0) { *nx_port = port; } return error; } static int nx_fsw_dom_unbind_port(struct kern_nexus *nx, nexus_port_t nx_port) { struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx); int error; FSW_WLOCK(fsw); error = nx_port_unbind(nx, nx_port); FSW_WUNLOCK(fsw); SK_DF(error ? SK_VERB_ERROR : SK_VERB_FSW, "nx 0x%llx \"%s\" nx_port %d (err %d)", SK_KVA(nx), nx->nx_prov->nxprov_params->nxp_name, (int)nx_port, error); return error; } static int nx_fsw_dom_connect(struct kern_nexus_domain_provider *nxdom_prov, struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr, struct kern_channel *ch0, struct nxbind *nxb, struct proc *p) { #pragma unused(nxdom_prov) nexus_port_t port = chr->cr_port; int err = 0; SK_LOCK_ASSERT_HELD(); ASSERT(nx->nx_prov->nxprov_params->nxp_type == nxdom_prov->nxdom_prov_dom->nxdom_type && nx->nx_prov->nxprov_params->nxp_type == NEXUS_TYPE_FLOW_SWITCH); ASSERT(!(ch->ch_flags & CHANF_HOST)); ASSERT(!(ch->ch_flags & CHANF_KERNEL)); if (port != NEXUS_PORT_ANY && port >= NXDOM_MAX(NX_DOM(nx), ports)) { err = EDOM; goto done; } chr->cr_real_endpoint = chr->cr_endpoint = CH_ENDPOINT_FLOW_SWITCH; ASSERT(port != NEXUS_PORT_ANY); (void) snprintf(chr->cr_name, sizeof(chr->cr_name), "%s_%llu:%u", NX_FSW_NAME, nx->nx_id, port); chr->cr_ring_set = RING_SET_DEFAULT; err = na_connect(nx, ch, chr, ch0, nxb, p); done: return err; } static void nx_fsw_dom_disconnect(struct kern_nexus_domain_provider *nxdom_prov, struct kern_nexus *nx, struct kern_channel *ch) { #pragma unused(nxdom_prov) SK_LOCK_ASSERT_HELD(); SK_D("channel 0x%llx -!- nexus 0x%llx (%s:\"%s\":%u:%d)", SK_KVA(ch), SK_KVA(nx), nxdom_prov->nxdom_prov_name, ch->ch_na->na_name, ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id); if (ch->ch_flags & CHANF_KERNEL) { na_disconnect_spec(nx, ch); } else { na_disconnect(nx, ch); } } static void nx_fsw_dom_defunct(struct kern_nexus_domain_provider *nxdom_prov, struct kern_nexus *nx, struct kern_channel *ch, struct proc *p) { #pragma unused(nxdom_prov) struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx); LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED); ASSERT(!(ch->ch_flags & CHANF_KERNEL)); ASSERT(ch->ch_na->na_type == NA_FLOWSWITCH_VP); /* * Hold the flowswitch lock as writer; this prevents all data path * accesses to the flowswitch, and allows us to mark the rings with * CKRF_DEFUNCT. Unlike some other nexus types, the flowswitch * doesn't utilize kr_{enter,exit} for serialization, at present. */ FSW_WLOCK(fsw); na_ch_rings_defunct(ch, p); FSW_WUNLOCK(fsw); } static void nx_fsw_dom_defunct_finalize(struct kern_nexus_domain_provider *nxdom_prov, struct kern_nexus *nx, struct kern_channel *ch, boolean_t locked) { #pragma unused(nxdom_prov) struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx); int err = 0; if (!locked) { SK_LOCK_ASSERT_NOTHELD(); SK_LOCK(); LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED); } else { SK_LOCK_ASSERT_HELD(); LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED); } ASSERT(!(ch->ch_flags & CHANF_KERNEL)); ASSERT(ch->ch_na->na_type == NA_FLOWSWITCH_VP); ASSERT(VPNA(ch->ch_na)->vpna_nx_port == ch->ch_info->cinfo_nx_port); err = fsw_port_na_defunct(fsw, VPNA(ch->ch_na)); if (err == 0) { na_defunct(nx, ch, ch->ch_na, locked); } SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d) err %d", ch->ch_name, ch->ch_pid, SK_KVA(ch), SK_KVA(nx), nxdom_prov->nxdom_prov_name, ch->ch_na->na_name, ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id, err); if (!locked) { LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED); SK_UNLOCK(); } else { LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED); SK_LOCK_ASSERT_HELD(); } } #if SK_LOG /* Hoisted out of line to reduce kernel stack footprint */ SK_LOG_ATTRIBUTE static void nx_fsw_na_find_log(const struct chreq *chr, boolean_t create) { uuid_string_t uuidstr; SK_D("name \"%s\" spec_uuid \"%s\" nx_port %d mode 0x%b pipe_id %u " "ring_id %d ring_set %u ep_type %u:%u create %u%s", chr->cr_name, sk_uuid_unparse(chr->cr_spec_uuid, uuidstr), (int)chr->cr_port, chr->cr_mode, CHMODE_BITS, chr->cr_pipe_id, (int)chr->cr_ring_id, chr->cr_ring_set, chr->cr_real_endpoint, chr->cr_endpoint, create, (strlcmp(chr->cr_name, NX_FSW_NAME, sizeof(NX_FSW_NAME)) != 0) ? " (skipped)" : ""); } #endif /* SK_LOG */ /* * Try to get a reference to a Nexus adapter attached to a flow switch. * If the adapter is found (or is created), this function returns 0, a * non NULL pointer is returned into *na, and the caller holds a * reference to the adapter. * If an adapter is not found, then no reference is grabbed and the * function returns an error code, or 0 if there is just a flow switch prefix * mismatch. Therefore the caller holds a reference when * (*na != NULL && return == 0). */ int nx_fsw_na_find(struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr, struct nxbind *nxb, struct proc *p, struct nexus_adapter **na, boolean_t create) { struct nexus_vp_adapter *__single vpna = NULL; char *cr_name = chr->cr_name; struct nx_flowswitch *fsw; int error = 0; SK_LOCK_ASSERT_HELD(); *na = NULL; /* default return value */ #if SK_LOG if (__improbable(sk_verbose != 0)) { nx_fsw_na_find_log(chr, create); } #endif /* SK_LOG */ /* first try to see if this is a flow switch port. */ if (strlcmp(cr_name, NX_FSW_NAME, sizeof(NX_FSW_NAME) - 1) != 0) { return 0; /* no error, but no flow switch prefix */ } ASSERT(nx->nx_prov->nxprov_params->nxp_type == NEXUS_TYPE_FLOW_SWITCH); fsw = NX_FSW_PRIVATE(nx); ASSERT(fsw != NULL); if (!create) { return ENXIO; } /* * The flowswitch VP is only attachable from a user channel so none of * these flags should be set. */ ASSERT((chr->cr_mode & (CHMODE_KERNEL | CHMODE_CONFIG)) == 0); error = fsw_attach_vp(nx, ch, chr, nxb, p, &vpna); ASSERT(vpna == NULL || error == 0); if (error == 0) { /* use reference held by nx_fsw_attach_vp above */ *na = &vpna->vpna_up; SK_DF(SK_VERB_FSW, "vpna \"%s\" (0x%llx) refs %u to fsw \"%s\" nx_port %d", (*na)->na_name, SK_KVA(*na), (*na)->na_refcount, cr_name, (int)vpna->vpna_nx_port); } return error; } int nx_fsw_netagent_add(struct kern_nexus *nx) { return fsw_netagent_add_remove(nx, TRUE); } int nx_fsw_netagent_remove(struct kern_nexus *nx) { return fsw_netagent_add_remove(nx, FALSE); } void nx_fsw_netagent_update(struct kern_nexus *nx) { fsw_netagent_update(nx); } |