/* * include/haproxy/quic_conn.h * * Copyright 2020 HAProxy Technologies, Frederic Lecaille * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, version 2.1 * exclusively. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _HAPROXY_QUIC_CONN_H #define _HAPROXY_QUIC_CONN_H #ifdef USE_QUIC #ifndef USE_OPENSSL #error "Must define USE_OPENSSL" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern struct pool_head *pool_head_quic_connection_id; int ssl_quic_initial_ctx(struct bind_conf *bind_conf); /* Return the long packet type matching with version and */ static inline int quic_pkt_type(int type, uint32_t version) { if (version != QUIC_PROTOCOL_VERSION_2) return type; switch (type) { case QUIC_PACKET_TYPE_INITIAL: return 1; case QUIC_PACKET_TYPE_0RTT: return 2; case QUIC_PACKET_TYPE_HANDSHAKE: return 3; case QUIC_PACKET_TYPE_RETRY: return 0; } return -1; } static inline int qc_is_listener(struct quic_conn *qc) { return qc->flags & QUIC_FL_CONN_LISTENER; } /* Copy QUIC CID to . * This is the responsibility of the caller to check there is enough room in * to copy . * Always succeeds. */ static inline void quic_cid_cpy(struct quic_cid *dst, const struct quic_cid *src) { memcpy(dst->data, src->data, src->len); dst->len = src->len; } /* Copy socket address data into buffer. * This is the responsibility of the caller to check the output buffer is big * enough to contain these socket address data. * Return the number of bytes copied. */ static inline size_t quic_saddr_cpy(unsigned char *buf, const struct sockaddr_storage *saddr) { void *port, *addr; unsigned char *p; size_t port_len, addr_len; p = buf; if (saddr->ss_family == AF_INET6) { port = &((struct sockaddr_in6 *)saddr)->sin6_port; addr = &((struct sockaddr_in6 *)saddr)->sin6_addr; port_len = sizeof ((struct sockaddr_in6 *)saddr)->sin6_port; addr_len = sizeof ((struct sockaddr_in6 *)saddr)->sin6_addr; } else { port = &((struct sockaddr_in *)saddr)->sin_port; addr = &((struct sockaddr_in *)saddr)->sin_addr; port_len = sizeof ((struct sockaddr_in *)saddr)->sin_port; addr_len = sizeof ((struct sockaddr_in *)saddr)->sin_addr; } memcpy(p, port, port_len); p += port_len; memcpy(p, addr, addr_len); p += addr_len; return p - buf; } /* Dump the QUIC connection ID value if present (non null length). Used only for * debugging purposes. * Always succeeds. */ static inline void quic_cid_dump(struct buffer *buf, const struct quic_cid *cid) { int i; chunk_appendf(buf, "(%d", cid->len); if (cid->len) chunk_appendf(buf, ","); for (i = 0; i < cid->len; i++) chunk_appendf(buf, "%02x", cid->data[i]); chunk_appendf(buf, ")"); } /* Return tree index where is stored. */ static inline uchar _quic_cid_tree_idx(const unsigned char *cid) { return cid[0]; } /* Return tree index where is stored. */ static inline uchar quic_cid_tree_idx(const struct quic_cid *cid) { return _quic_cid_tree_idx(cid->data); } /* Insert into global CID tree as a thread-safe operation. */ static inline void quic_cid_insert(struct quic_connection_id *conn_id) { const uchar idx = quic_cid_tree_idx(&conn_id->cid); struct quic_cid_tree *tree = &quic_cid_trees[idx]; HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock); ebmb_insert(&tree->root, &conn_id->node, conn_id->cid.len); HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock); } /* Remove from global CID tree as a thread-safe operation. */ static inline void quic_cid_delete(struct quic_connection_id *conn_id) { const uchar idx = quic_cid_tree_idx(&conn_id->cid); struct quic_cid_tree __maybe_unused *tree = &quic_cid_trees[idx]; HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock); ebmb_delete(&conn_id->node); HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock); } /* Free the CIDs attached to QUIC connection. */ static inline void free_quic_conn_cids(struct quic_conn *conn) { struct eb64_node *node; node = eb64_first(&conn->cids); while (node) { struct quic_connection_id *conn_id; conn_id = eb64_entry(node, struct quic_connection_id, seq_num); /* remove the CID from the receiver tree */ quic_cid_delete(conn_id); /* remove the CID from the quic_conn tree */ node = eb64_next(node); eb64_delete(&conn_id->seq_num); pool_free(pool_head_quic_connection_id, conn_id); } } /* Copy new connection ID information to NEW_CONNECTION_ID frame. * Always succeeds. */ static inline void quic_connection_id_to_frm_cpy(struct quic_frame *dst, struct quic_connection_id *src) { struct qf_new_connection_id *ncid_frm = &dst->new_connection_id; ncid_frm->seq_num = src->seq_num.key; ncid_frm->retire_prior_to = src->retire_prior_to; ncid_frm->cid.len = src->cid.len; ncid_frm->cid.data = src->cid.data; ncid_frm->stateless_reset_token = src->stateless_reset_token; } /* Return a 32-bits integer in from QUIC packet with as address. * Makes point to the data after this 32-bits value if succeeded. * Note that these 32-bits integers are network bytes ordered. * Returns 0 if failed (not enough data in the buffer), 1 if succeeded. */ static inline int quic_read_uint32(uint32_t *val, const unsigned char **buf, const unsigned char *end) { if (end - *buf < sizeof *val) return 0; *val = ntohl(*(uint32_t *)*buf); *buf += sizeof *val; return 1; } /* Write a 32-bits integer to a buffer with as address. * Make point to the data after this 32-buts value if succeeded. * Note that these 32-bits integers are networkg bytes ordered. * Returns 0 if failed (not enough room in the buffer), 1 if succeeded. */ static inline int quic_write_uint32(unsigned char **buf, const unsigned char *end, uint32_t val) { if (end - *buf < sizeof val) return 0; *(uint32_t *)*buf = htonl(val); *buf += sizeof val; return 1; } /* Return the maximum number of bytes we must use to completely fill a * buffer with as size for a data field of bytes prefixed by its QUIC * variable-length (may be 0). * Also put in <*len_sz> the size of this QUIC variable-length. * So after returning from this function we have : <*len_sz> + <= * (<*len_sz> = { max(i), i + ret <= }) . */ static inline size_t max_available_room(size_t sz, size_t *len_sz) { size_t sz_sz, ret; size_t diff; sz_sz = quic_int_getsize(sz); if (sz <= sz_sz) return 0; ret = sz - sz_sz; *len_sz = quic_int_getsize(ret); /* Difference between the two sizes. Note that >= <*len_sz>. */ diff = sz_sz - *len_sz; if (unlikely(diff > 0)) { /* Let's try to take into an account remaining bytes. * * <----------------> * <--------------><--------> +----> * | * +---------------------------+-----------.... * <--------------------------------> */ size_t max_int = quic_max_int(*len_sz); if (max_int + *len_sz <= sz) ret = max_int; else ret = sz - diff; } return ret; } /* This function computes the maximum data we can put into a buffer with as * size prefixed with a variable-length field "Length" whose value is the * remaining data length, already filled of bytes which must be taken * into an account by "Length" field, and finally followed by the data we want * to put in this buffer prefixed again by a variable-length field. * is the size of the buffer to fill. * the number of bytes already put after the "Length" field. * the number of bytes we want to at most put in the buffer. * Also set <*dlen_sz> to the size of the data variable-length we want to put in * the buffer. This is typically this function which must be used to fill as * much as possible a QUIC packet made of only one CRYPTO or STREAM frames. * Returns this computed size if there is enough room in the buffer, 0 if not. */ static inline size_t max_stream_data_size(size_t sz, size_t ilen, size_t dlen) { size_t ret, len_sz, dlen_sz; /* * The length of variable-length QUIC integers are powers of two. * Look for the first 3length" field value which match our need. * As we must put bytes in our buffer, the minimum value for * is the number of bytes required to encode . */ for (len_sz = quic_int_getsize(ilen); len_sz <= QUIC_VARINT_MAX_SIZE; len_sz <<= 1) { if (sz < len_sz + ilen) return 0; ret = max_available_room(sz - len_sz - ilen, &dlen_sz); if (!ret) return 0; /* Check that <*len_sz> matches value */ if (len_sz + ilen + dlen_sz + ret <= quic_max_int(len_sz)) return ret < dlen ? ret : dlen; } return 0; } /* Return the length in bytes of packet number depending on * the largest ackownledged packet number. */ static inline size_t quic_packet_number_length(int64_t pn, int64_t largest_acked_pn) { int64_t max_nack_pkts; /* About packet number encoding, the RFC says: * The sender MUST use a packet number size able to represent more than * twice as large a range than the difference between the largest * acknowledged packet and packet number being sent. */ max_nack_pkts = 2 * (pn - largest_acked_pn) + 1; if (max_nack_pkts > 0xffffff) return 4; if (max_nack_pkts > 0xffff) return 3; if (max_nack_pkts > 0xff) return 2; return 1; } /* Encode packet number with as length in byte into a buffer with * as current copy address and as pointer to one past the end of * this buffer. This is the responsibility of the caller to check there is * enough room in the buffer to copy bytes. * Never fails. */ static inline int quic_packet_number_encode(unsigned char **buf, const unsigned char *end, uint64_t pn, size_t pn_len) { if (end - *buf < pn_len) return 0; /* Encode the packet number. */ switch (pn_len) { case 1: **buf = pn; break; case 2: write_n16(*buf, pn); break; case 3: (*buf)[0] = pn >> 16; (*buf)[1] = pn >> 8; (*buf)[2] = pn; break; case 4: write_n32(*buf, pn); break; } *buf += pn_len; return 1; } /* Returns the field value in milliseconds from ACK frame for * QUIC connection. Note that the value of coming from * ACK frame is in microseconds. */ static inline unsigned int quic_ack_delay_ms(struct qf_ack *ack_frm, struct quic_conn *conn) { return (ack_frm->ack_delay << conn->tx.params.ack_delay_exponent) / 1000; } /* Returns the field value in microsecond to be set in an ACK frame * depending on the time the packet with a new largest packet number was received. */ static inline uint64_t quic_compute_ack_delay_us(unsigned int time_received, struct quic_conn *conn) { return ((now_ms - time_received) * 1000) >> conn->tx.params.ack_delay_exponent; } /* Initialize a QUIC packet number space. * Never fails. */ static inline void quic_pktns_init(struct quic_pktns *pktns) { LIST_INIT(&pktns->tx.frms); pktns->tx.next_pn = -1; pktns->tx.pkts = EB_ROOT_UNIQUE; pktns->tx.time_of_last_eliciting = 0; pktns->tx.loss_time = TICK_ETERNITY; pktns->tx.in_flight = 0; pktns->tx.ack_delay = 0; pktns->rx.largest_pn = -1; pktns->rx.largest_acked_pn = -1; pktns->rx.arngs.root = EB_ROOT_UNIQUE; pktns->rx.arngs.sz = 0; pktns->rx.arngs.enc_sz = 0; pktns->rx.nb_aepkts_since_last_ack = 0; pktns->rx.largest_time_received = 0; pktns->flags = 0; } /* Returns the current largest acknowledged packet number if exists, -1 if not */ static inline int64_t quic_pktns_get_largest_acked_pn(struct quic_pktns *pktns) { struct eb64_node *ar = eb64_last(&pktns->rx.arngs.root); if (!ar) return -1; return eb64_entry(ar, struct quic_arng_node, first)->last; } /* The TX packets sent in the same datagram are linked to each others in * the order they are built. This function detach a packet from its successor * and predecessor in the same datagram. */ static inline void quic_tx_packet_dgram_detach(struct quic_tx_packet *pkt) { if (pkt->prev) pkt->prev->next = pkt->next; if (pkt->next) pkt->next->prev = pkt->prev; } /* Increment the reference counter of */ static inline void quic_tx_packet_refinc(struct quic_tx_packet *pkt) { pkt->refcnt++; } /* Decrement the reference counter of */ static inline void quic_tx_packet_refdec(struct quic_tx_packet *pkt) { if (--pkt->refcnt == 0) { BUG_ON(!LIST_ISEMPTY(&pkt->frms)); /* If there are others packet in the same datagram is attached to, * detach the previous one and the next one from . */ quic_tx_packet_dgram_detach(pkt); pool_free(pool_head_quic_tx_packet, pkt); } } static inline void quic_pktns_tx_pkts_release(struct quic_pktns *pktns, struct quic_conn *qc) { struct eb64_node *node; node = eb64_first(&pktns->tx.pkts); while (node) { struct quic_tx_packet *pkt; struct quic_frame *frm, *frmbak; pkt = eb64_entry(node, struct quic_tx_packet, pn_node); node = eb64_next(node); if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) qc->path->ifae_pkts--; list_for_each_entry_safe(frm, frmbak, &pkt->frms, list) { qc_frm_unref(frm, qc); LIST_DEL_INIT(&frm->list); quic_tx_packet_refdec(frm->pkt); qc_frm_free(&frm); } eb64_delete(&pkt->pn_node); quic_tx_packet_refdec(pkt); } } /* Discard packet number space attached to QUIC connection. * Its loss information are reset. Deduce the outstanding bytes for this * packet number space from the outstanding bytes for the path of this * connection. * Note that all the non acknowledged TX packets and their frames are freed. * Always succeeds. */ static inline void quic_pktns_discard(struct quic_pktns *pktns, struct quic_conn *qc) { qc->path->in_flight -= pktns->tx.in_flight; qc->path->prep_in_flight -= pktns->tx.in_flight; qc->path->loss.pto_count = 0; pktns->tx.time_of_last_eliciting = 0; pktns->tx.loss_time = TICK_ETERNITY; pktns->tx.pto_probe = 0; pktns->tx.in_flight = 0; quic_pktns_tx_pkts_release(pktns, qc); } /* Initialize

QUIC network path depending on boolean * which is true for an IPv4 path, if not false for an IPv6 path. */ static inline void quic_path_init(struct quic_path *path, int ipv4, struct quic_cc_algo *algo, struct quic_conn *qc) { unsigned int max_dgram_sz; max_dgram_sz = ipv4 ? QUIC_INITIAL_IPV4_MTU : QUIC_INITIAL_IPV6_MTU; quic_loss_init(&path->loss); path->mtu = max_dgram_sz; path->cwnd = QUIC_MIN(10 * max_dgram_sz, QUIC_MAX(max_dgram_sz << 1, 14720U)); path->mcwnd = path->cwnd; path->min_cwnd = max_dgram_sz << 1; path->prep_in_flight = 0; path->in_flight = 0; path->ifae_pkts = 0; quic_cc_init(&path->cc, algo, qc); } /* Return the remaining available on QUIC path. In fact this this *the remaining number of bytes available in the congestion controller window. */ static inline size_t quic_path_room(struct quic_path *path) { if (path->in_flight > path->cwnd) return 0; return path->cwnd - path->in_flight; } /* Return the remaining available on QUIC path for prepared data * (before being sent). Almost the same that for the QUIC path room, except that * here this is the data which have been prepared which are taken into an account. */ static inline size_t quic_path_prep_data(struct quic_path *path) { if (path->prep_in_flight > path->cwnd) return 0; return path->cwnd - path->prep_in_flight; } /* Return 1 if matches with the Application packet number space of * connection which is common to the 0-RTT and 1-RTT encryption levels, 0 * if not (handshake packets). */ static inline int quic_application_pktns(struct quic_pktns *pktns, struct quic_conn *conn) { return pktns == &conn->pktns[QUIC_TLS_PKTNS_01RTT]; } /* CRYPTO data buffer handling functions. */ static inline unsigned char *c_buf_getpos(struct quic_enc_level *qel, uint64_t offset) { int idx; unsigned char *data; idx = offset >> QUIC_CRYPTO_BUF_SHIFT; data = qel->tx.crypto.bufs[idx]->data; return data + (offset & QUIC_CRYPTO_BUF_MASK); } /* Returns 1 if the CRYPTO buffer at encryption level has been * consumed (sent to the peer), 0 if not. */ static inline int c_buf_consumed(struct quic_enc_level *qel) { return qel->tx.crypto.offset == qel->tx.crypto.sz; } /* Return 1 if header form is long, 0 if not. */ static inline int qc_pkt_long(const struct quic_rx_packet *pkt) { return pkt->type != QUIC_PACKET_TYPE_SHORT; } /* Return 1 if there is RX packets for QUIC encryption level, 0 if not */ static inline int qc_el_rx_pkts(struct quic_enc_level *qel) { int ret; ret = !eb_is_empty(&qel->rx.pkts); return ret; } /* Increment the reference counter of */ static inline void quic_rx_packet_refinc(struct quic_rx_packet *pkt) { pkt->refcnt++; } /* Decrement the reference counter of while remaining positive */ static inline void quic_rx_packet_refdec(struct quic_rx_packet *pkt) { if (pkt->refcnt) pkt->refcnt--; } /* Delete all RX packets for QUIC encryption level */ static inline void qc_el_rx_pkts_del(struct quic_enc_level *qel) { struct eb64_node *node; node = eb64_first(&qel->rx.pkts); while (node) { struct quic_rx_packet *pkt = eb64_entry(node, struct quic_rx_packet, pn_node); node = eb64_next(node); eb64_delete(&pkt->pn_node); quic_rx_packet_refdec(pkt); } } static inline void qc_list_qel_rx_pkts(struct quic_enc_level *qel) { struct eb64_node *node; node = eb64_first(&qel->rx.pkts); while (node) { struct quic_rx_packet *pkt; pkt = eb64_entry(node, struct quic_rx_packet, pn_node); fprintf(stderr, "pkt@%p type=%d pn=%llu\n", pkt, pkt->type, (ull)pkt->pn_node.key); node = eb64_next(node); } } static inline void qc_list_all_rx_pkts(struct quic_conn *qc) { fprintf(stderr, "REMAINING QEL RX PKTS:\n"); qc_list_qel_rx_pkts(&qc->els[QUIC_TLS_ENC_LEVEL_INITIAL]); qc_list_qel_rx_pkts(&qc->els[QUIC_TLS_ENC_LEVEL_EARLY_DATA]); qc_list_qel_rx_pkts(&qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE]); qc_list_qel_rx_pkts(&qc->els[QUIC_TLS_ENC_LEVEL_APP]); } void chunk_frm_appendf(struct buffer *buf, const struct quic_frame *frm); void quic_set_connection_close(struct quic_conn *qc, const struct quic_err err); void quic_set_tls_alert(struct quic_conn *qc, int alert); int quic_set_app_ops(struct quic_conn *qc, const unsigned char *alpn, size_t alpn_len); int qc_check_dcid(struct quic_conn *qc, unsigned char *dcid, size_t dcid_len); int quic_get_dgram_dcid(unsigned char *buf, const unsigned char *end, unsigned char **dcid, size_t *dcid_len); struct quic_cid quic_derive_cid(const struct quic_cid *orig, const struct sockaddr_storage *addr); int quic_get_cid_tid(const unsigned char *cid, size_t cid_len, const struct sockaddr_storage *cli_addr, unsigned char *buf, size_t buf_len); int qc_send_mux(struct quic_conn *qc, struct list *frms); void qc_notify_err(struct quic_conn *qc); int qc_notify_send(struct quic_conn *qc); void qc_release_frm(struct quic_conn *qc, struct quic_frame *frm); void qc_check_close_on_released_mux(struct quic_conn *qc); void quic_conn_release(struct quic_conn *qc); void qc_kill_conn(struct quic_conn *qc); int qc_parse_hd_form(struct quic_rx_packet *pkt, unsigned char **buf, const unsigned char *end); int quic_dgram_parse(struct quic_dgram *dgram, struct quic_conn *qc, struct listener *li); /* Wake up every QUIC connections on closing/draining state if process stopping * is active. They will be immediately released so this ensures haproxy process * stopping is not delayed by them. */ static inline void quic_handle_stopping(void) { struct quic_conn *qc; if (stopping) { list_for_each_entry(qc, &th_ctx->quic_conns_clo, el_th_ctx) task_wakeup(qc->idle_timer_task, TASK_WOKEN_OTHER); } } int qc_set_tid_affinity(struct quic_conn *qc, uint new_tid, struct listener *new_li); void qc_finalize_affinity_rebind(struct quic_conn *qc); #endif /* USE_QUIC */ #endif /* _HAPROXY_QUIC_CONN_H */