From 46e12947d28fed171003d407654259f783c84b76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Paraniak?= Date: Fri, 26 Jul 2024 19:50:46 +0800 Subject: [PATCH 1/8] jets: jet +rip-octs, +cat-octs in bytestream --- pkg/noun/jets/e/bytestream.c | 146 +++++++++++++++++++++++++++++++++++ pkg/noun/jets/tree.c | 10 +++ pkg/noun/jets/w.h | 3 + 3 files changed, 159 insertions(+) create mode 100644 pkg/noun/jets/e/bytestream.c diff --git a/pkg/noun/jets/e/bytestream.c b/pkg/noun/jets/e/bytestream.c new file mode 100644 index 0000000000..0b8855a609 --- /dev/null +++ b/pkg/noun/jets/e/bytestream.c @@ -0,0 +1,146 @@ +#include +#include +#include +#include + +static void _x_octs(u3_noun octs, u3_atom* p_octs, u3_atom* q_octs) { + + if (c3n == u3r_mean(octs, + 2, p_octs, + 3, q_octs, 0)){ + u3m_bail(c3__exit); + } + + if (c3n == u3a_is_atom(*p_octs) || + c3n == u3a_is_atom(*q_octs)) { + u3m_bail(c3__exit); + } +} +static void _x_octs_buffer(u3_atom *q_octs, c3_w p_octs_w, c3_y** buf_y, + c3_w* len_w, c3_w* lead_w) +{ + *len_w = u3r_met(3, *q_octs); + + if (c3y == u3a_is_cat(*q_octs)) { + *buf_y = (c3_y*)q_octs; + } + else { + u3a_atom* ptr_a = u3a_to_ptr(*q_octs); + *buf_y = (c3_y*)ptr_a->buf_w; + } + + *lead_w = 0; + + if (p_octs_w > *len_w) { + *lead_w = p_octs_w - *len_w; + } + else { + *len_w = p_octs_w; + } + +} + +u3_noun _qe_bytestream_rip_octs(u3_atom p_octs, u3_atom q_octs) { + + c3_w p_octs_w; + + if (c3n == u3r_safe_word(p_octs, &p_octs_w)) { + return u3m_bail(c3__exit); + } + + if (p_octs_w == 0) { + return u3_nul; + } + + c3_w len_w, lead_w; + c3_y* buf_y; + + _x_octs_buffer(&q_octs, p_octs_w, &buf_y, &len_w, &lead_w); + + buf_y += len_w; + + u3_noun rip = u3_nul; + + while (lead_w--) { + rip = u3nc(u3i_word(0x0), rip); + } + + while (len_w--) { + rip = u3nc(*(--buf_y), rip); + } + + return rip; +} +u3_noun +u3we_bytestream_rip_octs(u3_noun cor){ + + u3_noun sam = u3x_at(u3x_sam, cor); + + u3_atom p_octs, q_octs; + _x_octs(sam, &p_octs, &q_octs); + + return _qe_bytestream_rip_octs(p_octs, q_octs); + +} + +u3_noun +_qe_bytestream_cat_octs(u3_noun octs_a, u3_noun octs_b) { + + u3_atom p_octs_a, p_octs_b; + u3_atom q_octs_a, q_octs_b; + + _x_octs(octs_a, &p_octs_a, &q_octs_a); + _x_octs(octs_b, &p_octs_b, &q_octs_b); + + c3_w p_octs_a_w, p_octs_b_w; + + if ( c3n == u3r_safe_word(p_octs_a, &p_octs_a_w) || + c3n == u3r_safe_word(p_octs_b, &p_octs_b_w)) { + u3m_bail(c3__exit); + } + + if (p_octs_a_w == 0) { + return octs_b; + } + if (p_octs_b_w == 0) { + return octs_a; + } + + c3_w len_w, lem_w; + c3_w lead_w, leaf_w; + + c3_y* sea_y; + c3_y* seb_y; + + _x_octs_buffer(&q_octs_a, p_octs_a_w, &sea_y, &len_w, &lead_w); + _x_octs_buffer(&q_octs_b, p_octs_b_w, &seb_y, &lem_w, &leaf_w); + + // XX In the far distant future, in a land far away, + // this might actually lead to integer overflow + // + c3_d p_octs_d = p_octs_a_w + lem_w; + + u3i_slab sab_u; + + u3i_slab_bare(&sab_u, 3, p_octs_d); + sab_u.buf_w[sab_u.len_w - 1] = 0; + + memcpy(sab_u.buf_y, sea_y, len_w); + memset(sab_u.buf_y + len_w, 0, lead_w); + memcpy(sab_u.buf_y + p_octs_a_w, seb_y, lem_w); + + u3_noun q_octs = u3i_slab_mint(&sab_u); + + return u3nc(u3i_chub(p_octs_d), q_octs); +} + +u3_noun +u3we_bytestream_cat_octs(u3_noun cor){ + + u3_noun octs_a, octs_b; + + u3x_mean(cor, u3x_sam_2, &octs_a, u3x_sam_3, &octs_b, 0); + + return _qe_bytestream_cat_octs(octs_a, octs_b); + +} diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index e40fe03846..2d924a996d 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2341,6 +2341,15 @@ static u3j_core _138_hex_blake_d[] = {} }; +static u3j_harm _138_hex_bytestream_rip_octs_a[] = {{".2", u3we_bytestream_rip_octs, c3y}, {}}; +static u3j_harm _138_hex_bytestream_cat_octs_a[] = {{".2", u3we_bytestream_cat_octs, c3y}, {}}; + +static u3j_core _138_hex_bytestream_d[] = + { {"rip-octs", 7, _138_hex_bytestream_rip_octs_a, 0, no_hashes }, + {"cat-octs", 7, _138_hex_bytestream_cat_octs_a, 0, no_hashes }, + {} + }; + static u3j_core _138_hex_d[] = { { "lore", 63, _140_hex_lore_a, 0, no_hashes }, { "leer", 63, _140_hex_leer_a, 0, no_hashes }, @@ -2359,6 +2368,7 @@ static u3j_core _138_hex_d[] = { "secp", 6, 0, _140_hex_secp_d, no_hashes }, { "mimes", 31, 0, _140_hex_mimes_d, no_hashes }, { "json", 31, 0, _139_hex_json_d, no_hashes }, + { "bytestream", 3, 0, _138_hex_bytestream_d, no_hashes}, {} }; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 34716971bd..5ddb79e60d 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -291,6 +291,9 @@ u3_noun u3wes_gte(u3_noun); u3_noun u3wes_gth(u3_noun); + u3_noun u3we_bytestream_rip_octs(u3_noun); + u3_noun u3we_bytestream_cat_octs(u3_noun); + /** Tier 6. **/ u3_noun u3wf_bull(u3_noun); From 32ad08017bc81086c2f53cc77c7396b2f5e99f4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Paraniak?= Date: Tue, 30 Jul 2024 12:17:36 +0800 Subject: [PATCH 2/8] bytestream jets: implement +can-octs --- pkg/noun/jets/e/bytestream.c | 187 ++++++++++++++++++++++++++++------- pkg/noun/jets/tree.c | 2 + pkg/noun/jets/w.h | 1 + 3 files changed, 153 insertions(+), 37 deletions(-) diff --git a/pkg/noun/jets/e/bytestream.c b/pkg/noun/jets/e/bytestream.c index 0b8855a609..61f2670e05 100644 --- a/pkg/noun/jets/e/bytestream.c +++ b/pkg/noun/jets/e/bytestream.c @@ -2,6 +2,7 @@ #include #include #include +#include static void _x_octs(u3_noun octs, u3_atom* p_octs, u3_atom* q_octs) { @@ -16,9 +17,16 @@ static void _x_octs(u3_noun octs, u3_atom* p_octs, u3_atom* q_octs) { u3m_bail(c3__exit); } } -static void _x_octs_buffer(u3_atom *q_octs, c3_w p_octs_w, c3_y** buf_y, +static void _x_octs_buffer(u3_atom* p_octs, u3_atom *q_octs, + c3_w* p_octs_w, c3_y** buf_y, c3_w* len_w, c3_w* lead_w) { + // XX gracefully handle p.octs exceeding a word + // + if (c3n == u3r_safe_word(*p_octs, p_octs_w)) { + u3m_bail(c3__exit); + } + *len_w = u3r_met(3, *q_octs); if (c3y == u3a_is_cat(*q_octs)) { @@ -31,32 +39,26 @@ static void _x_octs_buffer(u3_atom *q_octs, c3_w p_octs_w, c3_y** buf_y, *lead_w = 0; - if (p_octs_w > *len_w) { - *lead_w = p_octs_w - *len_w; + if (*p_octs_w > *len_w) { + *lead_w = *p_octs_w - *len_w; } else { - *len_w = p_octs_w; + *len_w = *p_octs_w; } } u3_noun _qe_bytestream_rip_octs(u3_atom p_octs, u3_atom q_octs) { - c3_w p_octs_w; + c3_w p_octs_w, len_w, lead_w; + c3_y* buf_y; - if (c3n == u3r_safe_word(p_octs, &p_octs_w)) { - return u3m_bail(c3__exit); - } + _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &buf_y, &len_w, &lead_w); if (p_octs_w == 0) { return u3_nul; } - c3_w len_w, lead_w; - c3_y* buf_y; - - _x_octs_buffer(&q_octs, p_octs_w, &buf_y, &len_w, &lead_w); - buf_y += len_w; u3_noun rip = u3_nul; @@ -92,50 +94,42 @@ _qe_bytestream_cat_octs(u3_noun octs_a, u3_noun octs_b) { _x_octs(octs_a, &p_octs_a, &q_octs_a); _x_octs(octs_b, &p_octs_b, &q_octs_b); - c3_w p_octs_a_w, p_octs_b_w; - - if ( c3n == u3r_safe_word(p_octs_a, &p_octs_a_w) || - c3n == u3r_safe_word(p_octs_b, &p_octs_b_w)) { - u3m_bail(c3__exit); - } - - if (p_octs_a_w == 0) { - return octs_b; - } - if (p_octs_b_w == 0) { - return octs_a; - } - + c3_w p_octs_a_w, p_octs_b_w; c3_w len_w, lem_w; c3_w lead_w, leaf_w; c3_y* sea_y; c3_y* seb_y; - _x_octs_buffer(&q_octs_a, p_octs_a_w, &sea_y, &len_w, &lead_w); - _x_octs_buffer(&q_octs_b, p_octs_b_w, &seb_y, &lem_w, &leaf_w); + _x_octs_buffer(&p_octs_a, &q_octs_a, &p_octs_a_w, &sea_y, &len_w, &lead_w); + _x_octs_buffer(&p_octs_b, &q_octs_b, &p_octs_b_w, &seb_y, &lem_w, &leaf_w); - // XX In the far distant future, in a land far away, - // this might actually lead to integer overflow - // - c3_d p_octs_d = p_octs_a_w + lem_w; + if (p_octs_a_w == 0) { + return u3k(octs_b); + } + if (p_octs_b_w == 0) { + return u3k(octs_a); + } + + c3_d p_octs_d = p_octs_a_w + p_octs_b_w; u3i_slab sab_u; - u3i_slab_bare(&sab_u, 3, p_octs_d); + u3i_slab_init(&sab_u, 3, (c3_d)p_octs_a_w + lem_w); sab_u.buf_w[sab_u.len_w - 1] = 0; memcpy(sab_u.buf_y, sea_y, len_w); memset(sab_u.buf_y + len_w, 0, lead_w); memcpy(sab_u.buf_y + p_octs_a_w, seb_y, lem_w); - u3_noun q_octs = u3i_slab_mint(&sab_u); + u3_noun q_octs = u3i_slab_moot(&sab_u); + u3_noun ret = u3nc(u3i_chub(p_octs_d), q_octs); - return u3nc(u3i_chub(p_octs_d), q_octs); + return ret; } u3_noun -u3we_bytestream_cat_octs(u3_noun cor){ +u3we_bytestream_cat_octs(u3_noun cor) { u3_noun octs_a, octs_b; @@ -144,3 +138,122 @@ u3we_bytestream_cat_octs(u3_noun cor){ return _qe_bytestream_cat_octs(octs_a, octs_b); } + +u3_noun +_qe_bytestream_can_octs(u3_noun octs_list) { + + if (u3_nul == octs_list) { + return u3nc(0, 0); + } + + if (u3_nul == u3t(octs_list)) { + u3k(u3h(octs_list)); + } + + /* We can octs in two steps: + * first loop iteration computes the total required + * buffer size in bytes, factoring in the leading bytes + * of the final octs. The second loop iterates over each octs, + * copying the data to the output buffer. + */ + + // Compute total size + // + c3_d tot_d = 0; + + u3_noun octs_list_start = octs_list; + u3_noun octs = u3_none; + + while (octs_list != u3_nul) { + + octs = u3h(octs_list); + + if (c3n == u3a_is_atom(u3h(octs)) || + c3n == u3a_is_atom(u3t(octs))) { + u3m_bail(c3__exit); + } + c3_w p_octs_w; + + if (c3n == u3r_safe_word(u3h(octs), &p_octs_w)) { + return u3_none; + } + // Check for overflow + // + if ( p_octs_w > (UINT64_MAX - tot_d)){ + u3m_bail(c3__exit); + } + tot_d += p_octs_w; + + octs_list = u3t(octs_list); + } + + + // Compute leading zeros of final octs -- the buffer + // size is decreased by this much. + // + // =leading-zeros (sub p.octs (met 3 q.octs)) + // + // p.octs fits into a word -- this has been verified + // in the loop above. + // + c3_w last_lead_w = (u3r_word(0, u3h(octs)) - u3r_met(3, u3t(octs))); + c3_d buf_len_w = tot_d - last_lead_w; + + u3i_slab sab_u; + u3i_slab_bare(&sab_u, 3, buf_len_w); + c3_y* buf_y = sab_u.buf_y; + + sab_u.buf_w[sab_u.len_w - 1] = 0; + + c3_y* sea_y; + u3_atom p_octs, q_octs; + c3_w p_octs_w, q_octs_w; + c3_w len_w, lead_w; + + // Bytes written so far + // + c3_d wit_d = 0; + + octs_list = octs_list_start; + + while (octs_list != u3_nul) { + + octs = u3h(octs_list); + + _x_octs(octs, &p_octs, &q_octs); + _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + + if (p_octs_w == 0) { + octs_list = u3t(octs_list); + continue; + } + + memcpy(buf_y, sea_y, len_w); + buf_y += len_w; + wit_d += len_w; + + // More bytes to follow, write leading zeros + // + if (wit_d < buf_len_w) { + memset(buf_y, 0, lead_w); + buf_y += lead_w; + wit_d += lead_w; + } + + octs_list = u3t(octs_list); + } + + u3_assert((buf_y - sab_u.buf_y) == buf_len_w); + + return u3nc(u3i_chub(tot_d), u3i_slab_mint(&sab_u)); +} + +u3_noun +u3we_bytestream_can_octs(u3_noun cor) { + + u3_noun octs_list; + + u3x_mean(cor, u3x_sam_1, &octs_list, 0); + + return _qe_bytestream_can_octs(octs_list); +} diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 2d924a996d..7134e9e0cd 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2343,10 +2343,12 @@ static u3j_core _138_hex_blake_d[] = static u3j_harm _138_hex_bytestream_rip_octs_a[] = {{".2", u3we_bytestream_rip_octs, c3y}, {}}; static u3j_harm _138_hex_bytestream_cat_octs_a[] = {{".2", u3we_bytestream_cat_octs, c3y}, {}}; +static u3j_harm _138_hex_bytestream_can_octs_a[] = {{".2", u3we_bytestream_can_octs, c3y}, {}}; static u3j_core _138_hex_bytestream_d[] = { {"rip-octs", 7, _138_hex_bytestream_rip_octs_a, 0, no_hashes }, {"cat-octs", 7, _138_hex_bytestream_cat_octs_a, 0, no_hashes }, + {"can-octs", 7, _138_hex_bytestream_can_octs_a, 0, no_hashes }, {} }; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 5ddb79e60d..e218adf179 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -293,6 +293,7 @@ u3_noun u3we_bytestream_rip_octs(u3_noun); u3_noun u3we_bytestream_cat_octs(u3_noun); + u3_noun u3we_bytestream_can_octs(u3_noun); /** Tier 6. **/ From 5ce4545d7c2eafa18f03bcba160bfb864e2db220 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Paraniak?= Date: Thu, 8 Aug 2024 13:31:07 +0800 Subject: [PATCH 3/8] bytestream jets: jet for +chunk --- pkg/noun/jets/e/bytestream.c | 260 ++++++++++++++++++++++++++++++++++- pkg/noun/jets/tree.c | 17 ++- pkg/noun/jets/w.h | 7 + 3 files changed, 282 insertions(+), 2 deletions(-) diff --git a/pkg/noun/jets/e/bytestream.c b/pkg/noun/jets/e/bytestream.c index 61f2670e05..7ddc05ddad 100644 --- a/pkg/noun/jets/e/bytestream.c +++ b/pkg/noun/jets/e/bytestream.c @@ -3,6 +3,7 @@ #include #include #include +#include static void _x_octs(u3_noun octs, u3_atom* p_octs, u3_atom* q_octs) { @@ -249,7 +250,8 @@ _qe_bytestream_can_octs(u3_noun octs_list) { } u3_noun -u3we_bytestream_can_octs(u3_noun cor) { +u3we_bytestream_can_octs(u3_noun cor) +{ u3_noun octs_list; @@ -257,3 +259,259 @@ u3we_bytestream_can_octs(u3_noun cor) { return _qe_bytestream_can_octs(octs_list); } +u3_noun _qe_bytestream_skip_line(u3_atom pos, u3_noun octs) +{ + c3_w pos_w; + + if (c3n == u3r_safe_word(pos, &pos_w)) { + return u3m_bail(c3__exit); + } + + u3_atom p_octs, q_octs; + + _x_octs(octs, &p_octs, &q_octs); + + c3_w p_octs_w; + c3_w len_w, lead_w; + + c3_y* sea_y; + + _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + + while (pos_w < len_w) { + if (*(sea_y + pos_w) == '\n') { + break; + } + pos_w++; + } + // Newline not found, position at the end + if (*(sea_y + pos_w) != '\n') { + pos_w = p_octs; + } + else { + pos_w++; + } + + return u3nc(u3i_word(pos_w), u3k(octs)); +} +u3_noun u3we_bytestream_skip_line(u3_noun cor) +{ + + u3_atom pos; + u3_noun octs; + + u3x_mean(cor, u3x_sam_2, &pos, u3x_sam_3, &octs, 0); + + return _qe_bytestream_skip_line(pos, octs); + +} +u3_noun _qe_bytestream_find_byte(u3_atom bat, u3_atom pos, u3_noun octs) +{ + c3_w bat_w, pos_w; + + if (c3n == u3r_safe_word(bat, &bat_w) || bat_w > 0xff) { + return u3m_bail(c3__exit); + } + if (c3n == u3r_safe_word(pos, &pos_w)) { + return u3m_bail(c3__exit); + } + + u3_atom p_octs, q_octs; + + _x_octs(octs, &p_octs, &q_octs); + + c3_w p_octs_w; + c3_w len_w, lead_w; + + c3_y* sea_y; + + _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + + while (pos_w < len_w) { + + if (*(sea_y + pos_w) == bat_w) { + return u3nc(u3_nul, u3i_word(pos_w)); + } + + pos_w++; + } + // Here we are sure that: + // (1) bat_w has not been found + // (2) therefore pos_w == len_w + // + // If bat_w == 0, and there is still input + // in the stream, it means pos_w points at + // the first leading zero. + // + if (pos_w < p_octs && bat_w == 0) { + return u3nc(u3_nul, u3i_word(pos_w)); + } + + return u3_nul; +} +u3_noun u3we_bytestream_find_byte(u3_noun cor) +{ + u3_atom bat; + u3_atom pos; + u3_noun octs; + + u3x_mean(cor, u3x_sam_2, &bat, + u3x_sam_6, &pos, + u3x_sam_7, &octs, 0); + + return _qe_bytestream_find_byte(bat, pos, octs); +} +u3_noun _qe_bytestream_seek_byte(u3_atom bat, u3_atom pos, u3_noun octs) +{ + c3_w bat_w, pos_w; + + if (c3n == u3r_safe_word(bat, &bat_w) || bat_w > 0xff) { + return u3m_bail(c3__exit); + } + if (c3n == u3r_safe_word(pos, &pos_w)) { + return u3m_bail(c3__exit); + } + + u3_atom p_octs, q_octs; + + _x_octs(octs, &p_octs, &q_octs); + + c3_w p_octs_w; + c3_w len_w, lead_w; + + c3_y* sea_y; + + _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + + while (pos_w < len_w) { + + if (*(sea_y + pos_w) == bat_w) { + u3_noun idx = u3nc(u3_nul, u3i_word(pos_w)); + u3_noun new_octs = u3nc(u3i_word(pos_w), u3k(octs)); + return u3nc(idx, new_octs); + } + + pos_w++; + } + + // find leading zero: see comment in *_find_byte + // + if (pos_w < p_octs && bat_w == 0) { + u3_noun idx = u3nc(u3_nul, u3i_word(pos_w)); + u3_noun new_octs = u3nc(u3i_word(pos_w), u3k(octs)); + return u3nc(idx, new_octs); + } + + return u3nc(u3_nul, u3nc(u3k(pos), u3k(octs))); + +} +u3_noun u3we_bytestream_seek_byte(u3_noun cor) +{ + u3_atom bat; + u3_atom pos; + u3_noun octs; + + u3x_mean(cor, u3x_sam_2, &bat, + u3x_sam_6, &pos, + u3x_sam_7, &octs, 0); + + return _qe_bytestream_seek_byte(bat, pos, octs); +} + +u3_noun _qe_peek_octs(c3_w n_w, c3_w pos_w, c3_w p_octs_w, c3_y* sea_y, + c3_w len_w) +{ + if (n_w == 0) { + return u3nc(0, 0); + } + + if (pos_w + n_w > p_octs_w) { + return u3m_bail(c3__exit); + } + + // Read leading zeros only + // + if (pos_w >= len_w) { + return u3nc(u3i_word(n_w), 0); + } + // Number of remaining buffer bytes + c3_w reb_w = len_w - pos_w; + + u3i_slab sab_u; + c3_w my_len_w; + + if (n_w < reb_w) { + my_len_w = n_w; + } + else { + my_len_w = reb_w; + } + u3i_slab_bare(&sab_u, 3, my_len_w); + sab_u.buf_w[sab_u.len_w - 1] = 0; + memcpy(sab_u.buf_y, sea_y + pos_w, my_len_w); + + return u3nc(u3i_word(n_w), u3i_slab_moot(&sab_u)); +} +u3_noun _qe_bytestream_chunk(u3_atom size, u3_noun pos, u3_noun octs) +{ + c3_w size_w, pos_w; + + if (c3n == u3r_safe_word(size, &size_w)) { + return u3m_bail(c3__exit); + } + + if (size_w == 0) { + return u3_nul; + } + + if (c3n == u3r_safe_word(pos, &pos_w)) { + return u3m_bail(c3__exit); + } + + u3_atom p_octs, q_octs; + + _x_octs(octs, &p_octs, &q_octs); + + c3_w p_octs_w; + c3_w len_w, lead_w; + + c3_y* sea_y; + + _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + + u3_noun hun = u3_nul; + + while (pos_w < p_octs) { + // Remaining bytes + // + c3_w rem = (p_octs - pos_w); + + if (rem < size) { + u3_noun octs = _qe_peek_octs(rem, pos_w, p_octs_w, sea_y, + len_w); + hun = u3nc(octs, hun); + pos_w += rem; + } + else { + u3_noun octs = _qe_peek_octs(size, pos_w, p_octs_w, sea_y, + len_w); + hun = u3nc(octs, hun); + pos_w += size; + } + } + + return u3kb_flop(hun); +} + +u3_noun u3we_bytestream_chunk(u3_noun cor) +{ + u3_atom size; + u3_atom pos; + u3_noun octs; + + u3x_mean(cor, u3x_sam_2, &size, + u3x_sam_6, &pos, + u3x_sam_7, &octs, 0); + + return _qe_bytestream_chunk(size, pos, octs); +} diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 7134e9e0cd..740fd5dbf1 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2341,14 +2341,29 @@ static u3j_core _138_hex_blake_d[] = {} }; +//+| %utilities static u3j_harm _138_hex_bytestream_rip_octs_a[] = {{".2", u3we_bytestream_rip_octs, c3y}, {}}; static u3j_harm _138_hex_bytestream_cat_octs_a[] = {{".2", u3we_bytestream_cat_octs, c3y}, {}}; static u3j_harm _138_hex_bytestream_can_octs_a[] = {{".2", u3we_bytestream_can_octs, c3y}, {}}; +//+| %navigation +static u3j_harm _138_hex_bytestream_skip_line_a[] = {{".2", u3we_bytestream_skip_line, c3y}, {}}; +static u3j_harm _138_hex_bytestream_find_byte_a[] = {{".2", u3we_bytestream_find_byte, c3y}, {}}; +static u3j_harm _138_hex_bytestream_seek_byte_a[] = {{".2", u3we_bytestream_seek_byte, c3y}, {}}; +//+| %transformation +static u3j_harm _138_hex_bytestream_chunk_a[] = {{".2", u3we_bytestream_chunk}, {}}; static u3j_core _138_hex_bytestream_d[] = - { {"rip-octs", 7, _138_hex_bytestream_rip_octs_a, 0, no_hashes }, + { + //+| %utilities + {"rip-octs", 7, _138_hex_bytestream_rip_octs_a, 0, no_hashes }, {"cat-octs", 7, _138_hex_bytestream_cat_octs_a, 0, no_hashes }, {"can-octs", 7, _138_hex_bytestream_can_octs_a, 0, no_hashes }, + //+| %navigation + {"skip-line", 7, _138_hex_bytestream_skip_line_a, 0, no_hashes }, + {"find-byte", 7, _138_hex_bytestream_find_byte_a, 0, no_hashes }, + {"seek-byte", 7, _138_hex_bytestream_seek_byte_a, 0, no_hashes }, + //+| %transformation + {"chunk", 7, _138_hex_bytestream_chunk_a, 0, no_hashes }, {} }; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index e218adf179..c6ab23e666 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -291,9 +291,16 @@ u3_noun u3wes_gte(u3_noun); u3_noun u3wes_gth(u3_noun); + //+| %utilities u3_noun u3we_bytestream_rip_octs(u3_noun); u3_noun u3we_bytestream_cat_octs(u3_noun); u3_noun u3we_bytestream_can_octs(u3_noun); + //+| %navigation + u3_noun u3we_bytestream_skip_line(u3_noun); + u3_noun u3we_bytestream_find_byte(u3_noun); + u3_noun u3we_bytestream_seek_byte(u3_noun); + //+| %transformation + u3_noun u3we_bytestream_chunk(u3_noun); /** Tier 6. **/ From 10120da872bfe95872915ac04a39f18137a1a715 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Paraniak?= Date: Thu, 8 Aug 2024 15:09:02 +0800 Subject: [PATCH 4/8] bytestream jets: jet +extract, +fuse-extract --- pkg/noun/jets/e/bytestream.c | 190 ++++++++++++++++++++++++++++++++++- pkg/noun/jets/tree.c | 4 + pkg/noun/jets/w.h | 2 + 3 files changed, 194 insertions(+), 2 deletions(-) diff --git a/pkg/noun/jets/e/bytestream.c b/pkg/noun/jets/e/bytestream.c index 7ddc05ddad..ba77a171d8 100644 --- a/pkg/noun/jets/e/bytestream.c +++ b/pkg/noun/jets/e/bytestream.c @@ -1,9 +1,10 @@ #include #include +#include +#include #include #include #include -#include static void _x_octs(u3_noun octs, u3_atom* p_octs, u3_atom* q_octs) { @@ -148,7 +149,7 @@ _qe_bytestream_can_octs(u3_noun octs_list) { } if (u3_nul == u3t(octs_list)) { - u3k(u3h(octs_list)); + return u3k(u3h(octs_list)); } /* We can octs in two steps: @@ -515,3 +516,188 @@ u3_noun u3we_bytestream_chunk(u3_noun cor) return _qe_bytestream_chunk(size, pos, octs); } + +u3_noun _qe_bytestream_extract(u3_noun sea, u3_noun rac) +{ + u3_atom pos; + u3_noun octs; + + u3x_mean(sea, 2, &pos, 3, &octs, 0); + + c3_w pos_w; + + if (c3n == u3r_safe_word(pos, &pos_w)) { + return u3m_bail(c3__exit); + } + + u3_atom p_octs, q_octs; + + _x_octs(octs, &p_octs, &q_octs); + + c3_w p_octs_w; + c3_w len_w, lead_w; + + c3_y* sea_y; + + _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + + u3_noun dal = u3_nul; + + u3_noun new_sea = u3_none; + + while (pos_w < p_octs_w) { + new_sea = u3nc(u3i_word(pos_w), u3k(octs)); + u3_noun ext = u3x_good(u3n_slam_on(u3k(rac), new_sea)); + + u3_atom sip, ken; + c3_w sip_w, ken_w; + + u3x_mean(ext, 2, &sip, 3, &ken, 0); + + if (c3n == u3r_safe_word(sip, &sip_w)) { + // XX is u3z necessary here? + // does memory get freed on bail? + // + u3l_log("sip fail"); + u3z(dal); + u3z(ext); + u3m_bail(c3__exit); + } + if (c3n == u3r_safe_word(ken, &ken_w)) { + u3l_log("ken fail"); + u3z(dal); + u3z(ext); + u3m_bail(c3__exit); + } + + u3z(ext); + + if (sip_w == 0 && ken_w == 0) { + break; + } + + if (pos_w + sip_w > p_octs_w) { + u3z(dal); + return u3_none; + } + + pos_w += sip_w; + + if (ken_w == 0) { + continue; + } + + u3_noun octs = _qe_peek_octs(ken_w, pos_w, p_octs_w, sea_y, len_w); + pos_w += ken_w; + dal = u3nc(octs, dal); + } + + new_sea = u3nc(u3i_word(pos_w), u3k(octs)); + + return u3nc(u3kb_flop(dal), new_sea); +} +u3_noun u3we_bytestream_extract(u3_noun cor) +{ + u3_noun sea; + u3_noun rac; + + u3x_mean(cor, u3x_sam_2, &sea, + u3x_sam_3, &rac, 0); + + return _qe_bytestream_extract(sea, rac); +} + +u3_noun _qe_bytestream_fuse_extract(u3_noun sea, u3_noun rac) +{ + u3_atom pos; + u3_noun octs; + + u3x_mean(sea, 2, &pos, 3, &octs, 0); + + c3_w pos_w; + + if (c3n == u3r_safe_word(pos, &pos_w)) { + return u3m_bail(c3__exit); + } + + u3_atom p_octs, q_octs; + + _x_octs(octs, &p_octs, &q_octs); + + c3_w p_octs_w; + c3_w len_w, lead_w; + + c3_y* sea_y; + + _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + + u3_noun dal = u3_nul; + + u3_noun new_sea = u3_none; + + while (pos_w < p_octs_w) { + new_sea = u3nc(u3i_word(pos_w), u3k(octs)); + u3_noun ext = u3x_good(u3n_slam_on(u3k(rac), new_sea)); + + u3_atom sip, ken; + c3_w sip_w, ken_w; + + u3x_mean(ext, 2, &sip, 3, &ken, 0); + + if (c3n == u3r_safe_word(sip, &sip_w)) { + // XX is u3z necessary here? + // does memory get freed on bail? + // + u3l_log("sip fail"); + u3z(dal); + u3z(ext); + u3m_bail(c3__exit); + } + if (c3n == u3r_safe_word(ken, &ken_w)) { + u3l_log("ken fail"); + u3z(dal); + u3z(ext); + u3m_bail(c3__exit); + } + + u3z(ext); + + if (sip_w == 0 && ken_w == 0) { + break; + } + + if (pos_w + sip_w > p_octs_w) { + u3z(dal); + return u3_none; + } + + pos_w += sip_w; + + if (ken_w == 0) { + continue; + } + + u3_noun octs = _qe_peek_octs(ken_w, pos_w, p_octs_w, sea_y, len_w); + pos_w += ken_w; + dal = u3nc(octs, dal); + } + + u3_noun lad = u3kb_flop(dal); + u3_noun data = _qe_bytestream_can_octs(lad); + u3z(lad); + + new_sea = u3nc(u3i_word(pos_w), u3k(octs)); + + return u3nc(data, new_sea); +} + +u3_noun u3we_bytestream_fuse_extract(u3_noun cor) +{ + u3_noun sea; + u3_noun rac; + + u3x_mean(cor, u3x_sam_2, &sea, + u3x_sam_3, &rac, 0); + + return _qe_bytestream_fuse_extract(sea, rac); +} diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 740fd5dbf1..e6d87ba385 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2351,6 +2351,8 @@ static u3j_harm _138_hex_bytestream_find_byte_a[] = {{".2", u3we_bytestream_find static u3j_harm _138_hex_bytestream_seek_byte_a[] = {{".2", u3we_bytestream_seek_byte, c3y}, {}}; //+| %transformation static u3j_harm _138_hex_bytestream_chunk_a[] = {{".2", u3we_bytestream_chunk}, {}}; +static u3j_harm _138_hex_bytestream_extract_a[] = {{".2", u3we_bytestream_extract}, {}}; +static u3j_harm _138_hex_bytestream_fuse_extract_a[] = {{".2", u3we_bytestream_fuse_extract}, {}}; static u3j_core _138_hex_bytestream_d[] = { @@ -2364,6 +2366,8 @@ static u3j_core _138_hex_bytestream_d[] = {"seek-byte", 7, _138_hex_bytestream_seek_byte_a, 0, no_hashes }, //+| %transformation {"chunk", 7, _138_hex_bytestream_chunk_a, 0, no_hashes }, + {"extract", 7, _138_hex_bytestream_extract_a, 0, no_hashes }, + {"fuse-extract", 7, _138_hex_bytestream_fuse_extract_a, 0, no_hashes }, {} }; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index c6ab23e666..64a5a3d33c 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -301,6 +301,8 @@ u3_noun u3we_bytestream_seek_byte(u3_noun); //+| %transformation u3_noun u3we_bytestream_chunk(u3_noun); + u3_noun u3we_bytestream_extract(u3_noun); + u3_noun u3we_bytestream_fuse_extract(u3_noun); /** Tier 6. **/ From 19cb641f7a1ada8504b299e21c091bc19617e4a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Paraniak?= Date: Thu, 8 Aug 2024 20:06:33 +0800 Subject: [PATCH 5/8] bytestream: fix 0-slab bug, leading zeros bug in +cat-octs, +can-octs --- pkg/noun/jets/e/bytestream.c | 77 +++++++++++++++++++++++++++++++----- pkg/noun/jets/tree.c | 18 ++++++++- pkg/noun/jets/w.h | 8 ++++ 3 files changed, 92 insertions(+), 11 deletions(-) diff --git a/pkg/noun/jets/e/bytestream.c b/pkg/noun/jets/e/bytestream.c index ba77a171d8..ec3a2f2e82 100644 --- a/pkg/noun/jets/e/bytestream.c +++ b/pkg/noun/jets/e/bytestream.c @@ -115,18 +115,26 @@ _qe_bytestream_cat_octs(u3_noun octs_a, u3_noun octs_b) { c3_d p_octs_d = p_octs_a_w + p_octs_b_w; - u3i_slab sab_u; + u3_noun ret = u3_none; - u3i_slab_init(&sab_u, 3, (c3_d)p_octs_a_w + lem_w); - sab_u.buf_w[sab_u.len_w - 1] = 0; + // Both a and b are 0. + // + if (len_w + lem_w == 0) { + ret = u3nc(u3i_chub(p_octs_d), u3i_word(0)); + } + else { + u3i_slab sab_u; - memcpy(sab_u.buf_y, sea_y, len_w); - memset(sab_u.buf_y + len_w, 0, lead_w); - memcpy(sab_u.buf_y + p_octs_a_w, seb_y, lem_w); + u3i_slab_init(&sab_u, 3, (c3_d)p_octs_a_w + lem_w); + sab_u.buf_w[sab_u.len_w - 1] = 0; - u3_noun q_octs = u3i_slab_moot(&sab_u); - u3_noun ret = u3nc(u3i_chub(p_octs_d), q_octs); + memcpy(sab_u.buf_y, sea_y, len_w); + memset(sab_u.buf_y + len_w, 0, lead_w); + memcpy(sab_u.buf_y + p_octs_a_w, seb_y, lem_w); + u3_noun q_octs = u3i_slab_moot(&sab_u); + ret = u3nc(u3i_chub(p_octs_d), q_octs); + } return ret; } @@ -165,6 +173,8 @@ _qe_bytestream_can_octs(u3_noun octs_list) { u3_noun octs_list_start = octs_list; u3_noun octs = u3_none; + // Last non-zero octs + u3_noun last_octs = u3_none; while (octs_list != u3_nul) { @@ -189,8 +199,7 @@ _qe_bytestream_can_octs(u3_noun octs_list) { octs_list = u3t(octs_list); } - - // Compute leading zeros of final octs -- the buffer + // Compute leading zeros of last non-zero octs -- the buffer // size is decreased by this much. // // =leading-zeros (sub p.octs (met 3 q.octs)) @@ -198,9 +207,17 @@ _qe_bytestream_can_octs(u3_noun octs_list) { // p.octs fits into a word -- this has been verified // in the loop above. // + // The resulting buf_len_w is correct only if the last + // octs is non-zero: but at the return u3i_slab_mint + // takes care of trimming. + // c3_w last_lead_w = (u3r_word(0, u3h(octs)) - u3r_met(3, u3t(octs))); c3_d buf_len_w = tot_d - last_lead_w; + if (buf_len_w == 0) { + return u3nc(u3i_word(tot_d), 0); + } + u3i_slab sab_u; u3i_slab_bare(&sab_u, 3, buf_len_w); c3_y* buf_y = sab_u.buf_y; @@ -701,3 +718,43 @@ u3_noun u3we_bytestream_fuse_extract(u3_noun cor) return _qe_bytestream_fuse_extract(sea, rac); } + +u3_noun _qe_bytestream_need_bits(u3_atom n, u3_noun bits) +{ + return u3_none; +} +u3_noun u3we_bytestream_need_bits(u3_noun cor) +{ + + u3_noun n; + u3_noun bits; + + u3x_mean(cor, u3x_sam_2, &n, + u3x_sam_3, &bits, 0); + + return _qe_bytestream_need_bits(n, bits); +} +u3_noun u3we_bytestream_drop_bits(u3_noun cor) +{ + return u3_none; +} +u3_noun u3we_bytestream_skip_bits(u3_noun cor) +{ + return u3_none; +} +u3_noun u3we_bytestream_peek_bits(u3_noun cor) +{ + return u3_none; +} +u3_noun u3we_bytestream_read_bits(u3_noun cor) +{ + return u3_none; +} +u3_noun u3we_bytestream_read_need_bits(u3_noun cor) +{ + return u3_none; +} +u3_noun u3we_bytestream_byte_bits(u3_noun cor) +{ + return u3_none; +} diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index e6d87ba385..55f7042d70 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2353,6 +2353,14 @@ static u3j_harm _138_hex_bytestream_seek_byte_a[] = {{".2", u3we_bytestream_seek static u3j_harm _138_hex_bytestream_chunk_a[] = {{".2", u3we_bytestream_chunk}, {}}; static u3j_harm _138_hex_bytestream_extract_a[] = {{".2", u3we_bytestream_extract}, {}}; static u3j_harm _138_hex_bytestream_fuse_extract_a[] = {{".2", u3we_bytestream_fuse_extract}, {}}; +//+| %bitstream +static u3j_harm _138_hex_bytestream_need_bits_a[] = {{".2", u3we_bytestream_need_bits}, {}}; +static u3j_harm _138_hex_bytestream_drop_bits_a[] = {{".2", u3we_bytestream_drop_bits}, {}}; +static u3j_harm _138_hex_bytestream_skip_bits_a[] = {{".2", u3we_bytestream_skip_bits}, {}}; +static u3j_harm _138_hex_bytestream_peek_bits_a[] = {{".2", u3we_bytestream_peek_bits}, {}}; +static u3j_harm _138_hex_bytestream_read_bits_a[] = {{".2", u3we_bytestream_read_bits}, {}}; +static u3j_harm _138_hex_bytestream_read_need_bits_a[] = {{".2", u3we_bytestream_read_need_bits}, {}}; +static u3j_harm _138_hex_bytestream_byte_bits_a[] = {{".2", u3we_bytestream_byte_bits}, {}}; static u3j_core _138_hex_bytestream_d[] = { @@ -2368,7 +2376,15 @@ static u3j_core _138_hex_bytestream_d[] = {"chunk", 7, _138_hex_bytestream_chunk_a, 0, no_hashes }, {"extract", 7, _138_hex_bytestream_extract_a, 0, no_hashes }, {"fuse-extract", 7, _138_hex_bytestream_fuse_extract_a, 0, no_hashes }, - {} + //+| %bitstream + {"need-bits", 7, _138_hex_bytestream_need_bits_a, 0, no_hashes }, + {"drop-bits", 7, _138_hex_bytestream_drop_bits_a, 0, no_hashes }, + {"skip-bits", 7, _138_hex_bytestream_skip_bits_a, 0, no_hashes }, + {"peek-bits", 7, _138_hex_bytestream_peek_bits_a, 0, no_hashes }, + {"read-bits", 7, _138_hex_bytestream_read_bits_a, 0, no_hashes }, + {"read-need-bits", 7, _138_hex_bytestream_read_need_bits_a, 0, no_hashes }, + {"byte-bits", 7, _138_hex_bytestream_byte_bits_a, 0, no_hashes }, + }; static u3j_core _138_hex_d[] = diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 64a5a3d33c..7813e456e2 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -303,6 +303,14 @@ u3_noun u3we_bytestream_chunk(u3_noun); u3_noun u3we_bytestream_extract(u3_noun); u3_noun u3we_bytestream_fuse_extract(u3_noun); + //+| %bitstream + u3_noun u3we_bytestream_need_bits(u3_noun); + u3_noun u3we_bytestream_drop_bits(u3_noun); + u3_noun u3we_bytestream_skip_bits(u3_noun); + u3_noun u3we_bytestream_peek_bits(u3_noun); + u3_noun u3we_bytestream_read_bits(u3_noun); + u3_noun u3we_bytestream_read_need_bits(u3_noun); + u3_noun u3we_bytestream_byte_bits(u3_noun); /** Tier 6. **/ From da9c086275e8e82fa1cd2fc96036e5a3b55dfc21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Paraniak?= Date: Thu, 8 Aug 2024 20:11:41 +0800 Subject: [PATCH 6/8] bytestream: do not enable bitstream jets --- pkg/noun/jets/e/bytestream.c | 18 ++++++------------ pkg/noun/jets/tree.c | 14 +++++++------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/pkg/noun/jets/e/bytestream.c b/pkg/noun/jets/e/bytestream.c index ec3a2f2e82..b8adc62036 100644 --- a/pkg/noun/jets/e/bytestream.c +++ b/pkg/noun/jets/e/bytestream.c @@ -719,20 +719,14 @@ u3_noun u3we_bytestream_fuse_extract(u3_noun cor) return _qe_bytestream_fuse_extract(sea, rac); } -u3_noun _qe_bytestream_need_bits(u3_atom n, u3_noun bits) -{ - return u3_none; -} +// +$ bits $+ bits +// $: num=@ud +// bit=@ub +// =bays +// == u3_noun u3we_bytestream_need_bits(u3_noun cor) { - - u3_noun n; - u3_noun bits; - - u3x_mean(cor, u3x_sam_2, &n, - u3x_sam_3, &bits, 0); - - return _qe_bytestream_need_bits(n, bits); + return u3_none; } u3_noun u3we_bytestream_drop_bits(u3_noun cor) { diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 55f7042d70..40a7fcccc1 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2377,13 +2377,13 @@ static u3j_core _138_hex_bytestream_d[] = {"extract", 7, _138_hex_bytestream_extract_a, 0, no_hashes }, {"fuse-extract", 7, _138_hex_bytestream_fuse_extract_a, 0, no_hashes }, //+| %bitstream - {"need-bits", 7, _138_hex_bytestream_need_bits_a, 0, no_hashes }, - {"drop-bits", 7, _138_hex_bytestream_drop_bits_a, 0, no_hashes }, - {"skip-bits", 7, _138_hex_bytestream_skip_bits_a, 0, no_hashes }, - {"peek-bits", 7, _138_hex_bytestream_peek_bits_a, 0, no_hashes }, - {"read-bits", 7, _138_hex_bytestream_read_bits_a, 0, no_hashes }, - {"read-need-bits", 7, _138_hex_bytestream_read_need_bits_a, 0, no_hashes }, - {"byte-bits", 7, _138_hex_bytestream_byte_bits_a, 0, no_hashes }, + // {"need-bits", 7, _138_hex_bytestream_need_bits_a, 0, no_hashes }, + // {"drop-bits", 7, _138_hex_bytestream_drop_bits_a, 0, no_hashes }, + // {"skip-bits", 7, _138_hex_bytestream_skip_bits_a, 0, no_hashes }, + // {"peek-bits", 7, _138_hex_bytestream_peek_bits_a, 0, no_hashes }, + // {"read-bits", 7, _138_hex_bytestream_read_bits_a, 0, no_hashes }, + // {"read-need-bits", 7, _138_hex_bytestream_read_need_bits_a, 0, no_hashes }, + // {"byte-bits", 7, _138_hex_bytestream_byte_bits_a, 0, no_hashes }, }; From b25e1d3f5265f5e528b8c76b200b256d20813d4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Paraniak?= Date: Fri, 9 Aug 2024 16:27:45 +0800 Subject: [PATCH 7/8] bytestream: jet bitstream --- pkg/noun/jets/e/bytestream.c | 477 ++++++++++++++++++++++++++++++++--- pkg/noun/jets/tree.c | 22 +- pkg/noun/jets/w.h | 8 +- 3 files changed, 462 insertions(+), 45 deletions(-) diff --git a/pkg/noun/jets/e/bytestream.c b/pkg/noun/jets/e/bytestream.c index b8adc62036..6f732622e2 100644 --- a/pkg/noun/jets/e/bytestream.c +++ b/pkg/noun/jets/e/bytestream.c @@ -6,6 +6,10 @@ #include #include +// XX formatting: function return singature should break the line +// XX do not crash on indirect atoms, but default to Hoon +// XX use u3i_word to imprison all indirect atoms +// static void _x_octs(u3_noun octs, u3_atom* p_octs, u3_atom* q_octs) { if (c3n == u3r_mean(octs, @@ -19,7 +23,7 @@ static void _x_octs(u3_noun octs, u3_atom* p_octs, u3_atom* q_octs) { u3m_bail(c3__exit); } } -static void _x_octs_buffer(u3_atom* p_octs, u3_atom *q_octs, +static void _x_octs_buffer(u3_atom* p_octs, u3_atom *q_octs, c3_w* p_octs_w, c3_y** buf_y, c3_w* len_w, c3_w* lead_w) { @@ -204,11 +208,11 @@ _qe_bytestream_can_octs(u3_noun octs_list) { // // =leading-zeros (sub p.octs (met 3 q.octs)) // - // p.octs fits into a word -- this has been verified + // p.octs fits into a word -- this has been verified // in the loop above. // - // The resulting buf_len_w is correct only if the last - // octs is non-zero: but at the return u3i_slab_mint + // The resulting buf_len_w is correct only if the last + // octs is non-zero: but at the return u3i_slab_mint // takes care of trimming. // c3_w last_lead_w = (u3r_word(0, u3h(octs)) - u3r_met(3, u3t(octs))); @@ -314,12 +318,12 @@ u3_noun _qe_bytestream_skip_line(u3_atom pos, u3_noun octs) } u3_noun u3we_bytestream_skip_line(u3_noun cor) { - + u3_atom pos; u3_noun octs; u3x_mean(cor, u3x_sam_2, &pos, u3x_sam_3, &octs, 0); - + return _qe_bytestream_skip_line(pos, octs); } @@ -356,8 +360,8 @@ u3_noun _qe_bytestream_find_byte(u3_atom bat, u3_atom pos, u3_noun octs) // Here we are sure that: // (1) bat_w has not been found // (2) therefore pos_w == len_w - // - // If bat_w == 0, and there is still input + // + // If bat_w == 0, and there is still input // in the stream, it means pos_w points at // the first leading zero. // @@ -373,10 +377,10 @@ u3_noun u3we_bytestream_find_byte(u3_noun cor) u3_atom pos; u3_noun octs; - u3x_mean(cor, u3x_sam_2, &bat, - u3x_sam_6, &pos, + u3x_mean(cor, u3x_sam_2, &bat, + u3x_sam_6, &pos, u3x_sam_7, &octs, 0); - + return _qe_bytestream_find_byte(bat, pos, octs); } u3_noun _qe_bytestream_seek_byte(u3_atom bat, u3_atom pos, u3_noun octs) @@ -405,8 +409,8 @@ u3_noun _qe_bytestream_seek_byte(u3_atom bat, u3_atom pos, u3_noun octs) if (*(sea_y + pos_w) == bat_w) { u3_noun idx = u3nc(u3_nul, u3i_word(pos_w)); - u3_noun new_octs = u3nc(u3i_word(pos_w), u3k(octs)); - return u3nc(idx, new_octs); + u3_noun new_bays = u3nc(u3i_word(pos_w), u3k(octs)); + return u3nc(idx, new_bays); } pos_w++; @@ -416,8 +420,8 @@ u3_noun _qe_bytestream_seek_byte(u3_atom bat, u3_atom pos, u3_noun octs) // if (pos_w < p_octs && bat_w == 0) { u3_noun idx = u3nc(u3_nul, u3i_word(pos_w)); - u3_noun new_octs = u3nc(u3i_word(pos_w), u3k(octs)); - return u3nc(idx, new_octs); + u3_noun new_bays = u3nc(u3i_word(pos_w), u3k(octs)); + return u3nc(idx, new_bays); } return u3nc(u3_nul, u3nc(u3k(pos), u3k(octs))); @@ -429,14 +433,148 @@ u3_noun u3we_bytestream_seek_byte(u3_noun cor) u3_atom pos; u3_noun octs; - u3x_mean(cor, u3x_sam_2, &bat, - u3x_sam_6, &pos, + u3x_mean(cor, u3x_sam_2, &bat, + u3x_sam_6, &pos, u3x_sam_7, &octs, 0); - + return _qe_bytestream_seek_byte(bat, pos, octs); } -u3_noun _qe_peek_octs(c3_w n_w, c3_w pos_w, c3_w p_octs_w, c3_y* sea_y, +u3_noun +_qe_bytestream_read_byte(u3_atom pos, u3_noun octs) +{ + c3_w pos_w; + + if (c3n == u3r_safe_word(pos, &pos_w)) { + return u3m_bail(c3__exit); + } + + u3_atom p_octs, q_octs; + + _x_octs(octs, &p_octs, &q_octs); + + c3_w p_octs_w; + c3_w len_w, lead_w; + + c3_y* sea_y; + + _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + + if (pos_w + 1 > p_octs_w) { + u3m_bail(c3__exit); + } + + c3_y bat_y; + + if (pos_w >= len_w) { + bat_y = 0; + } + else { + bat_y = *(sea_y + pos_w); + } + + u3_noun new_bays = u3nc(u3i_word(pos_w + 1), u3k(octs)); + + return u3nc(bat_y, new_bays); +} + +u3_noun +u3we_bytestream_read_byte(u3_noun cor) +{ + u3_atom pos; + u3_noun octs; + + u3x_mean(cor, u3x_sam_2, &pos, + u3x_sam_3, &octs, 0); + + return _qe_bytestream_read_byte(pos, octs); +} + +u3_noun +_qe_bytestream_read_octs(u3_atom n, u3_atom pos, u3_noun octs) +{ + c3_w n_w, pos_w; + + if (c3n == u3r_safe_word(n, &n_w)) { + return u3m_bail(c3__exit); + } + + if (c3n == u3r_safe_word(pos, &pos_w)) { + return u3m_bail(c3__exit); + } + + if (n_w == 0) { + return u3nc(u3nc(0,0), u3nc(u3k(pos), u3k(octs))); + } + + u3_atom p_octs, q_octs; + + _x_octs(octs, &p_octs, &q_octs); + + c3_w p_octs_w; + c3_w len_w, lead_w; + + c3_y* sea_y; + + _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + + if (pos_w + n_w > p_octs_w) { + u3m_bail(c3__exit); + } + + // Number of bytes to read, excluding leading zeros + // + c3_w red_w = n_w; + + if (pos_w + n_w > len_w) { + if (pos_w < len_w) { + red_w = len_w - pos_w; + } + else { + red_w = 0; + } + } + + u3_noun read_octs; + + if (red_w == 0) { + read_octs = u3nc(u3i_word(n_w), 0); + } + else { + u3i_slab sab_u; + u3i_slab_bare(&sab_u, 3, n_w); + sab_u.buf_w[sab_u.len_w - 1] = 0; + + memcpy(sab_u.buf_y, sea_y + pos_w, red_w); + + if (red_w < n_w) { + memset(sab_u.buf_y + red_w, 0, (n_w - red_w)); + } + + read_octs = u3nc(u3i_word(n_w), u3i_slab_moot(&sab_u)); + } + + u3_noun new_bays = u3nc(u3i_word(pos_w + n_w), u3k(octs)); + + return u3nc(read_octs, new_bays); +} + +u3_noun +u3we_bytestream_read_octs(u3_noun cor) +{ + u3_atom n; + u3_atom pos; + u3_noun octs; + + u3x_mean(cor, u3x_sam_2, &n, + u3x_sam_6, &pos, + u3x_sam_7, &octs, 0); + + return _qe_bytestream_read_octs(n, pos, octs); +} + + +u3_noun _qe_peek_octs(c3_w n_w, c3_w pos_w, c3_w p_octs_w, c3_y* sea_y, c3_w len_w) { if (n_w == 0) { @@ -511,7 +649,7 @@ u3_noun _qe_bytestream_chunk(u3_atom size, u3_noun pos, u3_noun octs) pos_w += rem; } else { - u3_noun octs = _qe_peek_octs(size, pos_w, p_octs_w, sea_y, + u3_noun octs = _qe_peek_octs(size, pos_w, p_octs_w, sea_y, len_w); hun = u3nc(octs, hun); pos_w += size; @@ -527,10 +665,10 @@ u3_noun u3we_bytestream_chunk(u3_noun cor) u3_atom pos; u3_noun octs; - u3x_mean(cor, u3x_sam_2, &size, - u3x_sam_6, &pos, + u3x_mean(cor, u3x_sam_2, &size, + u3x_sam_6, &pos, u3x_sam_7, &octs, 0); - + return _qe_bytestream_chunk(size, pos, octs); } @@ -618,9 +756,9 @@ u3_noun u3we_bytestream_extract(u3_noun cor) u3_noun sea; u3_noun rac; - u3x_mean(cor, u3x_sam_2, &sea, + u3x_mean(cor, u3x_sam_2, &sea, u3x_sam_3, &rac, 0); - + return _qe_bytestream_extract(sea, rac); } @@ -713,12 +851,94 @@ u3_noun u3we_bytestream_fuse_extract(u3_noun cor) u3_noun sea; u3_noun rac; - u3x_mean(cor, u3x_sam_2, &sea, + u3x_mean(cor, u3x_sam_2, &sea, u3x_sam_3, &rac, 0); - + return _qe_bytestream_fuse_extract(sea, rac); } +u3_noun +_qe_bytestream_need_bits(u3_atom n, u3_noun bits) +{ + u3_atom num, bit; + u3_noun bays; + + u3x_mean(bits, 2, &num, + 6, &bit, + 7, &bays, 0); + + c3_w n_w, num_w; + c3_d bit_d; + + if (c3n == u3r_safe_word(n, &n_w)) { + return u3_none; + } + if (c3n == u3r_safe_word(num, &num_w)) { + return u3_none; + } + if (c3n == u3r_safe_chub(bit, &bit_d)) { + return u3_none; + } + + if (num_w >= n_w) { + return u3k(bits); + } + + // How many bytes to read + // + c3_w need_bits_w = n_w - num_w; + + // Requires indirect atom, drop to Hoon + // + if (need_bits_w > sizeof(bit_d)*8) { + return u3_none; + } + + c3_w need_bytes_w = need_bits_w / 8; + + if (need_bits_w % 8) { + need_bytes_w += 1; + } + + c3_w pos_w; + u3_atom pos; + u3_noun octs; + u3x_mean(bays, 2, &pos, 3, &octs, 0); + + if (c3n == u3r_safe_word(pos, &pos_w)) { + return u3_none; + } + + u3_atom p_octs, q_octs; + + _x_octs(octs, &p_octs, &q_octs); + + c3_w p_octs_w; + c3_w len_w, lead_w; + + c3_y* sea_y; + + _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + + if (pos_w + need_bytes_w > p_octs_w) { + u3m_bail(c3__exit); + } + + while (need_bytes_w--) { + + if (pos_w < len_w) { + bit_d += *(sea_y + pos_w) << num_w; + } + num_w += 8; + pos_w++; + + u3_assert(num_w <= 64); + } + + u3_noun new_bays = u3nc(u3i_word(pos_w), u3k(octs)); + + return u3nt(u3i_word(num_w), u3i_chub(bit_d), new_bays); +} // +$ bits $+ bits // $: num=@ud // bit=@ub @@ -726,29 +946,214 @@ u3_noun u3we_bytestream_fuse_extract(u3_noun cor) // == u3_noun u3we_bytestream_need_bits(u3_noun cor) { - return u3_none; + u3_atom n; + u3_noun bits; + + u3x_mean(cor, u3x_sam_2, &n, + u3x_sam_3, &bits, 0); + + return _qe_bytestream_need_bits(n, bits); +} +u3_noun +_qe_bytestream_drop_bits(u3_atom n, u3_noun bits) +{ + + u3_atom num, bit; + u3_noun bays; + + u3x_mean(bits, 2, &num, + 6, &bit, + 7, &bays, 0); + + c3_w n_w, num_w; + c3_d bit_d; + + if (c3n == u3r_safe_word(n, &n_w)) { + return u3_none; + } + if (c3n == u3r_safe_word(num, &num_w)) { + return u3_none; + } + if (c3n == u3r_safe_chub(bit, &bit_d)) { + return u3_none; + } + + c3_w dop_w = n_w; + + if (dop_w > num_w) { + dop_w = num_w; + } + + bit_d >>= dop_w; + num_w -= dop_w; + + return u3nt(u3i_word(num_w), u3i_chub(bit_d), u3k(bays)); } u3_noun u3we_bytestream_drop_bits(u3_noun cor) { - return u3_none; + u3_atom n; + u3_noun bits; + + u3x_mean(cor, u3x_sam_2, &n, + u3x_sam_3, &bits, 0); + + return _qe_bytestream_drop_bits(n, bits); } -u3_noun u3we_bytestream_skip_bits(u3_noun cor) + +u3_noun +_qe_bytestream_peek_bits(u3_atom n, u3_noun bits) { - return u3_none; + + u3_atom num, bit; + u3_noun bays; + + u3x_mean(bits, 2, &num, + 6, &bit, + 7, &bays, 0); + + c3_w n_w, num_w; + c3_d bit_d; + + if (c3n == u3r_safe_word(n, &n_w)) { + return u3_none; + } + if (c3n == u3r_safe_word(num, &num_w)) { + return u3_none; + } + if (c3n == u3r_safe_chub(bit, &bit_d)) { + return u3_none; + } + + if (n_w > num_w) { + u3m_bail(c3__exit); + } + + if (n_w > 64) { + return u3_none; + } + + if (n_w == 64) { + return u3i_chub(bit_d); + } + else { + c3_d mak_d = ((c3_d)1 << n_w) - 1; + + return u3i_chub(bit_d & mak_d); + } } u3_noun u3we_bytestream_peek_bits(u3_noun cor) { - return u3_none; + u3_atom n; + u3_noun bits; + + u3x_mean(cor, u3x_sam_2, &n, + u3x_sam_3, &bits, 0); + + return _qe_bytestream_peek_bits(n, bits); } + +u3_noun +_qe_bytestream_read_bits(u3_atom n, u3_noun bits) +{ + + u3_atom num, bit; + u3_noun bays; + + u3x_mean(bits, 2, &num, + 6, &bit, + 7, &bays, 0); + + c3_w n_w, num_w; + c3_d bit_d; + + if (c3n == u3r_safe_word(n, &n_w)) { + return u3_none; + } + if (c3n == u3r_safe_word(num, &num_w)) { + return u3_none; + } + if (c3n == u3r_safe_chub(bit, &bit_d)) { + return u3_none; + } + + if (n_w > num_w) { + u3m_bail(c3__exit); + } + + if (n_w > 64) { + return u3_none; + } + + c3_d bet_d = 0; + + if (n_w == 64) { + bet_d = bit_d; + } + else { + c3_d mak_d = ((c3_d)1 << n_w) - 1; + bet_d = bit_d & mak_d; + } + + c3_w dop_w = n_w; + + if (dop_w > num_w) { + dop_w = num_w; + } + + bit_d >>= dop_w; + num_w -= dop_w; + + u3_noun new_bits = u3nt(u3i_word(num_w), u3i_chub(bit_d), u3k(bays)); + + return u3nc(u3i_chub(bet_d), new_bits); +} + u3_noun u3we_bytestream_read_bits(u3_noun cor) { - return u3_none; + u3_atom n; + u3_noun bits; + + u3x_mean(cor, u3x_sam_2, &n, + u3x_sam_3, &bits, 0); + + return _qe_bytestream_read_bits(n, bits); } -u3_noun u3we_bytestream_read_need_bits(u3_noun cor) + +u3_noun +_qe_bytestream_byte_bits(u3_noun bits) { - return u3_none; + + u3_atom num, bit; + u3_noun bays; + + u3x_mean(bits, 2, &num, + 6, &bit, + 7, &bays, 0); + + c3_w num_w; + c3_d bit_d; + + if (c3n == u3r_safe_word(num, &num_w)) { + return u3_none; + } + if (c3n == u3r_safe_chub(bit, &bit_d)) { + return u3_none; + } + + c3_y rem_y = num_w & 0x7; + + u3_noun new_bits = u3nt(u3i_word(num_w - rem_y), + u3i_chub(bit_d >> rem_y), + u3k(bays)); + + return new_bits; } + u3_noun u3we_bytestream_byte_bits(u3_noun cor) { - return u3_none; + u3_noun bits; + + u3x_mean(cor, u3x_sam, &bits, 0); + + return _qe_bytestream_byte_bits(bits); } diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 40a7fcccc1..1b4d5e4ea8 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2345,6 +2345,10 @@ static u3j_core _138_hex_blake_d[] = static u3j_harm _138_hex_bytestream_rip_octs_a[] = {{".2", u3we_bytestream_rip_octs, c3y}, {}}; static u3j_harm _138_hex_bytestream_cat_octs_a[] = {{".2", u3we_bytestream_cat_octs, c3y}, {}}; static u3j_harm _138_hex_bytestream_can_octs_a[] = {{".2", u3we_bytestream_can_octs, c3y}, {}}; +//+| %read-byte +static u3j_harm _138_hex_bytestream_read_byte_a[] = {{".2", u3we_bytestream_read_byte, c3y}, {}}; +//+| %read-octs +static u3j_harm _138_hex_bytestream_read_octs_a[] = {{".2", u3we_bytestream_read_octs, c3y}, {}}; //+| %navigation static u3j_harm _138_hex_bytestream_skip_line_a[] = {{".2", u3we_bytestream_skip_line, c3y}, {}}; static u3j_harm _138_hex_bytestream_find_byte_a[] = {{".2", u3we_bytestream_find_byte, c3y}, {}}; @@ -2356,10 +2360,10 @@ static u3j_harm _138_hex_bytestream_fuse_extract_a[] = {{".2", u3we_bytestream_f //+| %bitstream static u3j_harm _138_hex_bytestream_need_bits_a[] = {{".2", u3we_bytestream_need_bits}, {}}; static u3j_harm _138_hex_bytestream_drop_bits_a[] = {{".2", u3we_bytestream_drop_bits}, {}}; -static u3j_harm _138_hex_bytestream_skip_bits_a[] = {{".2", u3we_bytestream_skip_bits}, {}}; +// static u3j_harm _138_hex_bytestream_skip_bits_a[] = {{".2", u3we_bytestream_skip_bits}, {}}; static u3j_harm _138_hex_bytestream_peek_bits_a[] = {{".2", u3we_bytestream_peek_bits}, {}}; static u3j_harm _138_hex_bytestream_read_bits_a[] = {{".2", u3we_bytestream_read_bits}, {}}; -static u3j_harm _138_hex_bytestream_read_need_bits_a[] = {{".2", u3we_bytestream_read_need_bits}, {}}; +// static u3j_harm _138_hex_bytestream_read_need_bits_a[] = {{".2", u3we_bytestream_read_need_bits}, {}}; static u3j_harm _138_hex_bytestream_byte_bits_a[] = {{".2", u3we_bytestream_byte_bits}, {}}; static u3j_core _138_hex_bytestream_d[] = @@ -2372,18 +2376,22 @@ static u3j_core _138_hex_bytestream_d[] = {"skip-line", 7, _138_hex_bytestream_skip_line_a, 0, no_hashes }, {"find-byte", 7, _138_hex_bytestream_find_byte_a, 0, no_hashes }, {"seek-byte", 7, _138_hex_bytestream_seek_byte_a, 0, no_hashes }, + //+| %read-byte + {"read-byte", 7, _138_hex_bytestream_read_byte_a, 0, no_hashes }, + //+| %read-octs + {"read-octs", 7, _138_hex_bytestream_read_octs_a, 0, no_hashes }, //+| %transformation {"chunk", 7, _138_hex_bytestream_chunk_a, 0, no_hashes }, {"extract", 7, _138_hex_bytestream_extract_a, 0, no_hashes }, {"fuse-extract", 7, _138_hex_bytestream_fuse_extract_a, 0, no_hashes }, //+| %bitstream - // {"need-bits", 7, _138_hex_bytestream_need_bits_a, 0, no_hashes }, - // {"drop-bits", 7, _138_hex_bytestream_drop_bits_a, 0, no_hashes }, + {"need-bits", 7, _138_hex_bytestream_need_bits_a, 0, no_hashes }, + {"drop-bits", 7, _138_hex_bytestream_drop_bits_a, 0, no_hashes }, // {"skip-bits", 7, _138_hex_bytestream_skip_bits_a, 0, no_hashes }, - // {"peek-bits", 7, _138_hex_bytestream_peek_bits_a, 0, no_hashes }, - // {"read-bits", 7, _138_hex_bytestream_read_bits_a, 0, no_hashes }, + {"peek-bits", 7, _138_hex_bytestream_peek_bits_a, 0, no_hashes }, + {"read-bits", 7, _138_hex_bytestream_read_bits_a, 0, no_hashes }, // {"read-need-bits", 7, _138_hex_bytestream_read_need_bits_a, 0, no_hashes }, - // {"byte-bits", 7, _138_hex_bytestream_byte_bits_a, 0, no_hashes }, + {"byte-bits", 7, _138_hex_bytestream_byte_bits_a, 0, no_hashes }, }; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 7813e456e2..0e249d9c09 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -299,6 +299,10 @@ u3_noun u3we_bytestream_skip_line(u3_noun); u3_noun u3we_bytestream_find_byte(u3_noun); u3_noun u3we_bytestream_seek_byte(u3_noun); + //+| %read-byte + u3_noun u3we_bytestream_read_byte(u3_noun); + //+| %read-octs + u3_noun u3we_bytestream_read_octs(u3_noun); //+| %transformation u3_noun u3we_bytestream_chunk(u3_noun); u3_noun u3we_bytestream_extract(u3_noun); @@ -306,10 +310,10 @@ //+| %bitstream u3_noun u3we_bytestream_need_bits(u3_noun); u3_noun u3we_bytestream_drop_bits(u3_noun); - u3_noun u3we_bytestream_skip_bits(u3_noun); + // u3_noun u3we_bytestream_skip_bits(u3_noun); u3_noun u3we_bytestream_peek_bits(u3_noun); u3_noun u3we_bytestream_read_bits(u3_noun); - u3_noun u3we_bytestream_read_need_bits(u3_noun); + // u3_noun u3we_bytestream_read_need_bits(u3_noun); u3_noun u3we_bytestream_byte_bits(u3_noun); /** Tier 6. From a09ff30cecd82dc3f14548a6ae4de58f01f63636 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Paraniak?= Date: Fri, 16 Aug 2024 18:20:17 +0800 Subject: [PATCH 8/8] jets: fix bytestream jets --- pkg/noun/jets/e/bytestream.c | 254 +++++++++++++++++++++++------------ pkg/noun/jets/tree.c | 3 +- 2 files changed, 172 insertions(+), 85 deletions(-) diff --git a/pkg/noun/jets/e/bytestream.c b/pkg/noun/jets/e/bytestream.c index 6f732622e2..71761e1f0f 100644 --- a/pkg/noun/jets/e/bytestream.c +++ b/pkg/noun/jets/e/bytestream.c @@ -6,11 +6,11 @@ #include #include -// XX formatting: function return singature should break the line // XX do not crash on indirect atoms, but default to Hoon // XX use u3i_word to imprison all indirect atoms // -static void _x_octs(u3_noun octs, u3_atom* p_octs, u3_atom* q_octs) { +static void +_x_octs(u3_noun octs, u3_atom* p_octs, u3_atom* q_octs) { if (c3n == u3r_mean(octs, 2, p_octs, @@ -23,14 +23,13 @@ static void _x_octs(u3_noun octs, u3_atom* p_octs, u3_atom* q_octs) { u3m_bail(c3__exit); } } -static void _x_octs_buffer(u3_atom* p_octs, u3_atom *q_octs, +static c3_o +_x_octs_buffer(u3_atom* p_octs, u3_atom *q_octs, c3_w* p_octs_w, c3_y** buf_y, c3_w* len_w, c3_w* lead_w) { - // XX gracefully handle p.octs exceeding a word - // if (c3n == u3r_safe_word(*p_octs, p_octs_w)) { - u3m_bail(c3__exit); + return c3n; } *len_w = u3r_met(3, *q_octs); @@ -52,33 +51,40 @@ static void _x_octs_buffer(u3_atom* p_octs, u3_atom *q_octs, *len_w = *p_octs_w; } + return c3y; } -u3_noun _qe_bytestream_rip_octs(u3_atom p_octs, u3_atom q_octs) { +u3_noun +_qe_bytestream_rip_octs(u3_atom p_octs, u3_atom q_octs) { c3_w p_octs_w, len_w, lead_w; c3_y* buf_y; - _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &buf_y, &len_w, &lead_w); + if (c3n == _x_octs_buffer(&p_octs, &q_octs, + &p_octs_w, &buf_y, + &len_w, &lead_w)){ + return u3_none; + } if (p_octs_w == 0) { return u3_nul; } - buf_y += len_w; - u3_noun rip = u3_nul; while (lead_w--) { - rip = u3nc(u3i_word(0x0), rip); + rip = u3nc(0x0, rip); } + buf_y += len_w - 1; + while (len_w--) { - rip = u3nc(*(--buf_y), rip); + rip = u3nc(*(buf_y--), rip); } return rip; } + u3_noun u3we_bytestream_rip_octs(u3_noun cor){ @@ -107,29 +113,39 @@ _qe_bytestream_cat_octs(u3_noun octs_a, u3_noun octs_b) { c3_y* sea_y; c3_y* seb_y; - _x_octs_buffer(&p_octs_a, &q_octs_a, &p_octs_a_w, &sea_y, &len_w, &lead_w); - _x_octs_buffer(&p_octs_b, &q_octs_b, &p_octs_b_w, &seb_y, &lem_w, &leaf_w); + if (c3n == _x_octs_buffer(&p_octs_a, &q_octs_a, + &p_octs_a_w, &sea_y, + &len_w, &lead_w)) { + return u3_none; + } + + if (c3n == _x_octs_buffer(&p_octs_b, &q_octs_b, + &p_octs_b_w, &seb_y, + &lem_w, &leaf_w)) { + return u3_none; + } if (p_octs_a_w == 0) { return u3k(octs_b); } + if (p_octs_b_w == 0) { return u3k(octs_a); } c3_d p_octs_d = p_octs_a_w + p_octs_b_w; - u3_noun ret = u3_none; + u3_noun ret; // Both a and b are 0. // - if (len_w + lem_w == 0) { + if (len_w == 0 && lem_w == 0) { ret = u3nc(u3i_chub(p_octs_d), u3i_word(0)); } else { u3i_slab sab_u; - u3i_slab_init(&sab_u, 3, (c3_d)p_octs_a_w + lem_w); + u3i_slab_bare(&sab_u, 3, (c3_d)p_octs_a_w + lem_w); sab_u.buf_w[sab_u.len_w - 1] = 0; memcpy(sab_u.buf_y, sea_y, len_w); @@ -191,12 +207,13 @@ _qe_bytestream_can_octs(u3_noun octs_list) { c3_w p_octs_w; if (c3n == u3r_safe_word(u3h(octs), &p_octs_w)) { + u3z(octs_list); return u3_none; } // Check for overflow // if ( p_octs_w > (UINT64_MAX - tot_d)){ - u3m_bail(c3__exit); + return u3_none; } tot_d += p_octs_w; @@ -244,7 +261,11 @@ _qe_bytestream_can_octs(u3_noun octs_list) { octs = u3h(octs_list); _x_octs(octs, &p_octs, &q_octs); - _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + if (c3n == _x_octs_buffer(&p_octs, &q_octs, + &p_octs_w, &sea_y, + &len_w, &lead_w)){ + return u3_none; + } if (p_octs_w == 0) { octs_list = u3t(octs_list); @@ -274,19 +295,19 @@ _qe_bytestream_can_octs(u3_noun octs_list) { u3_noun u3we_bytestream_can_octs(u3_noun cor) { - u3_noun octs_list; u3x_mean(cor, u3x_sam_1, &octs_list, 0); return _qe_bytestream_can_octs(octs_list); } -u3_noun _qe_bytestream_skip_line(u3_atom pos, u3_noun octs) +u3_noun +_qe_bytestream_skip_line(u3_atom pos, u3_noun octs) { c3_w pos_w; if (c3n == u3r_safe_word(pos, &pos_w)) { - return u3m_bail(c3__exit); + return u3_none; } u3_atom p_octs, q_octs; @@ -298,7 +319,11 @@ u3_noun _qe_bytestream_skip_line(u3_atom pos, u3_noun octs) c3_y* sea_y; - _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + if (c3n == _x_octs_buffer(&p_octs, &q_octs, + &p_octs_w, &sea_y, + &len_w, &lead_w)) { + return u3_none; + } while (pos_w < len_w) { if (*(sea_y + pos_w) == '\n') { @@ -316,7 +341,8 @@ u3_noun _qe_bytestream_skip_line(u3_atom pos, u3_noun octs) return u3nc(u3i_word(pos_w), u3k(octs)); } -u3_noun u3we_bytestream_skip_line(u3_noun cor) +u3_noun +u3we_bytestream_skip_line(u3_noun cor) { u3_atom pos; @@ -327,15 +353,16 @@ u3_noun u3we_bytestream_skip_line(u3_noun cor) return _qe_bytestream_skip_line(pos, octs); } -u3_noun _qe_bytestream_find_byte(u3_atom bat, u3_atom pos, u3_noun octs) +u3_noun +_qe_bytestream_find_byte(u3_atom bat, u3_atom pos, u3_noun octs) { c3_w bat_w, pos_w; if (c3n == u3r_safe_word(bat, &bat_w) || bat_w > 0xff) { - return u3m_bail(c3__exit); + return u3_none; } if (c3n == u3r_safe_word(pos, &pos_w)) { - return u3m_bail(c3__exit); + return u3_none; } u3_atom p_octs, q_octs; @@ -347,7 +374,11 @@ u3_noun _qe_bytestream_find_byte(u3_atom bat, u3_atom pos, u3_noun octs) c3_y* sea_y; - _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + if (c3n == _x_octs_buffer(&p_octs, &q_octs, + &p_octs_w, &sea_y, + &len_w, &lead_w)) { + return u3_none; + } while (pos_w < len_w) { @@ -371,7 +402,8 @@ u3_noun _qe_bytestream_find_byte(u3_atom bat, u3_atom pos, u3_noun octs) return u3_nul; } -u3_noun u3we_bytestream_find_byte(u3_noun cor) +u3_noun +u3we_bytestream_find_byte(u3_noun cor) { u3_atom bat; u3_atom pos; @@ -383,15 +415,16 @@ u3_noun u3we_bytestream_find_byte(u3_noun cor) return _qe_bytestream_find_byte(bat, pos, octs); } -u3_noun _qe_bytestream_seek_byte(u3_atom bat, u3_atom pos, u3_noun octs) +u3_noun +_qe_bytestream_seek_byte(u3_atom bat, u3_atom pos, u3_noun octs) { c3_w bat_w, pos_w; if (c3n == u3r_safe_word(bat, &bat_w) || bat_w > 0xff) { - return u3m_bail(c3__exit); + return u3_none; } if (c3n == u3r_safe_word(pos, &pos_w)) { - return u3m_bail(c3__exit); + return u3_none; } u3_atom p_octs, q_octs; @@ -403,7 +436,11 @@ u3_noun _qe_bytestream_seek_byte(u3_atom bat, u3_atom pos, u3_noun octs) c3_y* sea_y; - _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + if (c3n == _x_octs_buffer(&p_octs, &q_octs, + &p_octs_w, &sea_y, + &len_w, &lead_w)) { + return u3_none; + } while (pos_w < len_w) { @@ -427,7 +464,8 @@ u3_noun _qe_bytestream_seek_byte(u3_atom bat, u3_atom pos, u3_noun octs) return u3nc(u3_nul, u3nc(u3k(pos), u3k(octs))); } -u3_noun u3we_bytestream_seek_byte(u3_noun cor) +u3_noun +u3we_bytestream_seek_byte(u3_noun cor) { u3_atom bat; u3_atom pos; @@ -446,7 +484,7 @@ _qe_bytestream_read_byte(u3_atom pos, u3_noun octs) c3_w pos_w; if (c3n == u3r_safe_word(pos, &pos_w)) { - return u3m_bail(c3__exit); + return u3_none; } u3_atom p_octs, q_octs; @@ -458,7 +496,11 @@ _qe_bytestream_read_byte(u3_atom pos, u3_noun octs) c3_y* sea_y; - _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + if (c3n == _x_octs_buffer(&p_octs, &q_octs, + &p_octs_w, &sea_y, + &len_w, &lead_w)) { + return u3_none; + } if (pos_w + 1 > p_octs_w) { u3m_bail(c3__exit); @@ -466,11 +508,11 @@ _qe_bytestream_read_byte(u3_atom pos, u3_noun octs) c3_y bat_y; - if (pos_w >= len_w) { - bat_y = 0; + if (pos_w < len_w) { + bat_y = *(sea_y + pos_w); } else { - bat_y = *(sea_y + pos_w); + bat_y = 0; } u3_noun new_bays = u3nc(u3i_word(pos_w + 1), u3k(octs)); @@ -496,11 +538,11 @@ _qe_bytestream_read_octs(u3_atom n, u3_atom pos, u3_noun octs) c3_w n_w, pos_w; if (c3n == u3r_safe_word(n, &n_w)) { - return u3m_bail(c3__exit); + return u3_none; } if (c3n == u3r_safe_word(pos, &pos_w)) { - return u3m_bail(c3__exit); + return u3_none; } if (n_w == 0) { @@ -516,7 +558,11 @@ _qe_bytestream_read_octs(u3_atom n, u3_atom pos, u3_noun octs) c3_y* sea_y; - _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + if (c3n == _x_octs_buffer(&p_octs, &q_octs, + &p_octs_w, &sea_y, + &len_w, &lead_w)) { + return u3_none; + } if (pos_w + n_w > p_octs_w) { u3m_bail(c3__exit); @@ -530,6 +576,8 @@ _qe_bytestream_read_octs(u3_atom n, u3_atom pos, u3_noun octs) if (pos_w < len_w) { red_w = len_w - pos_w; } + // leading zeros - nothing to read + // else { red_w = 0; } @@ -574,7 +622,8 @@ u3we_bytestream_read_octs(u3_noun cor) } -u3_noun _qe_peek_octs(c3_w n_w, c3_w pos_w, c3_w p_octs_w, c3_y* sea_y, +u3_noun +_qe_peek_octs(c3_w n_w, c3_w pos_w, c3_w p_octs_w, c3_y* sea_y, c3_w len_w) { if (n_w == 0) { @@ -613,7 +662,7 @@ u3_noun _qe_bytestream_chunk(u3_atom size, u3_noun pos, u3_noun octs) c3_w size_w, pos_w; if (c3n == u3r_safe_word(size, &size_w)) { - return u3m_bail(c3__exit); + return u3_none; } if (size_w == 0) { @@ -621,7 +670,7 @@ u3_noun _qe_bytestream_chunk(u3_atom size, u3_noun pos, u3_noun octs) } if (c3n == u3r_safe_word(pos, &pos_w)) { - return u3m_bail(c3__exit); + return u3_none; } u3_atom p_octs, q_octs; @@ -633,7 +682,11 @@ u3_noun _qe_bytestream_chunk(u3_atom size, u3_noun pos, u3_noun octs) c3_y* sea_y; - _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + if (c3n == _x_octs_buffer(&p_octs, &q_octs, + &p_octs_w, &sea_y, + &len_w, &lead_w)) { + return u3_none; + } u3_noun hun = u3_nul; @@ -659,7 +712,8 @@ u3_noun _qe_bytestream_chunk(u3_atom size, u3_noun pos, u3_noun octs) return u3kb_flop(hun); } -u3_noun u3we_bytestream_chunk(u3_noun cor) +u3_noun +u3we_bytestream_chunk(u3_noun cor) { u3_atom size; u3_atom pos; @@ -672,7 +726,8 @@ u3_noun u3we_bytestream_chunk(u3_noun cor) return _qe_bytestream_chunk(size, pos, octs); } -u3_noun _qe_bytestream_extract(u3_noun sea, u3_noun rac) +u3_noun +_qe_bytestream_extract(u3_noun sea, u3_noun rac) { u3_atom pos; u3_noun octs; @@ -682,7 +737,7 @@ u3_noun _qe_bytestream_extract(u3_noun sea, u3_noun rac) c3_w pos_w; if (c3n == u3r_safe_word(pos, &pos_w)) { - return u3m_bail(c3__exit); + return u3_none; } u3_atom p_octs, q_octs; @@ -694,7 +749,11 @@ u3_noun _qe_bytestream_extract(u3_noun sea, u3_noun rac) c3_y* sea_y; - _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + if (c3n == _x_octs_buffer(&p_octs, &q_octs, + &p_octs_w, &sea_y, + &len_w, &lead_w)) { + return u3_none; + } u3_noun dal = u3_nul; @@ -713,16 +772,17 @@ u3_noun _qe_bytestream_extract(u3_noun sea, u3_noun rac) // XX is u3z necessary here? // does memory get freed on bail? // - u3l_log("sip fail"); + u3l_log("bytestream: sip fail"); u3z(dal); u3z(ext); - u3m_bail(c3__exit); + return u3_none; } + if (c3n == u3r_safe_word(ken, &ken_w)) { - u3l_log("ken fail"); + u3l_log("bytestream: ken fail"); u3z(dal); u3z(ext); - u3m_bail(c3__exit); + return u3_none; } u3z(ext); @@ -751,7 +811,8 @@ u3_noun _qe_bytestream_extract(u3_noun sea, u3_noun rac) return u3nc(u3kb_flop(dal), new_sea); } -u3_noun u3we_bytestream_extract(u3_noun cor) +u3_noun +u3we_bytestream_extract(u3_noun cor) { u3_noun sea; u3_noun rac; @@ -762,7 +823,8 @@ u3_noun u3we_bytestream_extract(u3_noun cor) return _qe_bytestream_extract(sea, rac); } -u3_noun _qe_bytestream_fuse_extract(u3_noun sea, u3_noun rac) +u3_noun +_qe_bytestream_fuse_extract(u3_noun sea, u3_noun rac) { u3_atom pos; u3_noun octs; @@ -772,7 +834,7 @@ u3_noun _qe_bytestream_fuse_extract(u3_noun sea, u3_noun rac) c3_w pos_w; if (c3n == u3r_safe_word(pos, &pos_w)) { - return u3m_bail(c3__exit); + return u3_none; } u3_atom p_octs, q_octs; @@ -784,7 +846,11 @@ u3_noun _qe_bytestream_fuse_extract(u3_noun sea, u3_noun rac) c3_y* sea_y; - _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + if (c3n == _x_octs_buffer(&p_octs, &q_octs, + &p_octs_w, &sea_y, + &len_w, &lead_w)) { + return u3_none; + } u3_noun dal = u3_nul; @@ -803,16 +869,16 @@ u3_noun _qe_bytestream_fuse_extract(u3_noun sea, u3_noun rac) // XX is u3z necessary here? // does memory get freed on bail? // - u3l_log("sip fail"); + u3l_log("bytestream: sip fail"); u3z(dal); u3z(ext); - u3m_bail(c3__exit); + return u3_none; } if (c3n == u3r_safe_word(ken, &ken_w)) { - u3l_log("ken fail"); + u3l_log("bytestream: ken fail"); u3z(dal); u3z(ext); - u3m_bail(c3__exit); + return u3_none; } u3z(ext); @@ -846,7 +912,8 @@ u3_noun _qe_bytestream_fuse_extract(u3_noun sea, u3_noun rac) return u3nc(data, new_sea); } -u3_noun u3we_bytestream_fuse_extract(u3_noun cor) +u3_noun +u3we_bytestream_fuse_extract(u3_noun cor) { u3_noun sea; u3_noun rac; @@ -857,6 +924,8 @@ u3_noun u3we_bytestream_fuse_extract(u3_noun cor) return _qe_bytestream_fuse_extract(sea, rac); } +#define BITS_D (sizeof(c3_d)*8) + u3_noun _qe_bytestream_need_bits(u3_atom n, u3_noun bits) { @@ -867,6 +936,7 @@ _qe_bytestream_need_bits(u3_atom n, u3_noun bits) 6, &bit, 7, &bays, 0); + c3_w n_w, num_w; c3_d bit_d; @@ -890,7 +960,7 @@ _qe_bytestream_need_bits(u3_atom n, u3_noun bits) // Requires indirect atom, drop to Hoon // - if (need_bits_w > sizeof(bit_d)*8) { + if (need_bits_w > BITS_D) { return u3_none; } @@ -903,6 +973,8 @@ _qe_bytestream_need_bits(u3_atom n, u3_noun bits) c3_w pos_w; u3_atom pos; u3_noun octs; + + u3x_mean(bays, 2, &pos, 3, &octs, 0); if (c3n == u3r_safe_word(pos, &pos_w)) { @@ -918,7 +990,11 @@ _qe_bytestream_need_bits(u3_atom n, u3_noun bits) c3_y* sea_y; - _x_octs_buffer(&p_octs, &q_octs, &p_octs_w, &sea_y, &len_w, &lead_w); + if (c3n == _x_octs_buffer(&p_octs, &q_octs, + &p_octs_w, &sea_y, + &len_w, &lead_w)) { + return u3_none; + } if (pos_w + need_bytes_w > p_octs_w) { u3m_bail(c3__exit); @@ -932,7 +1008,7 @@ _qe_bytestream_need_bits(u3_atom n, u3_noun bits) num_w += 8; pos_w++; - u3_assert(num_w <= 64); + u3_assert(num_w <= BITS_D); } u3_noun new_bays = u3nc(u3i_word(pos_w), u3k(octs)); @@ -944,7 +1020,8 @@ _qe_bytestream_need_bits(u3_atom n, u3_noun bits) // bit=@ub // =bays // == -u3_noun u3we_bytestream_need_bits(u3_noun cor) +u3_noun +u3we_bytestream_need_bits(u3_noun cor) { u3_atom n; u3_noun bits; @@ -954,6 +1031,7 @@ u3_noun u3we_bytestream_need_bits(u3_noun cor) return _qe_bytestream_need_bits(n, bits); } + u3_noun _qe_bytestream_drop_bits(u3_atom n, u3_noun bits) { @@ -978,6 +1056,10 @@ _qe_bytestream_drop_bits(u3_atom n, u3_noun bits) return u3_none; } + if(n_w == 0) { + return u3k(bits); + } + c3_w dop_w = n_w; if (dop_w > num_w) { @@ -989,7 +1071,8 @@ _qe_bytestream_drop_bits(u3_atom n, u3_noun bits) return u3nt(u3i_word(num_w), u3i_chub(bit_d), u3k(bays)); } -u3_noun u3we_bytestream_drop_bits(u3_noun cor) +u3_noun +u3we_bytestream_drop_bits(u3_noun cor) { u3_atom n; u3_noun bits; @@ -1024,15 +1107,19 @@ _qe_bytestream_peek_bits(u3_atom n, u3_noun bits) return u3_none; } + if (n_w == 0) { + return u3i_word(0); + } + if (n_w > num_w) { u3m_bail(c3__exit); } - if (n_w > 64) { + if (n_w > BITS_D) { return u3_none; } - if (n_w == 64) { + if (n_w == BITS_D) { return u3i_chub(bit_d); } else { @@ -1041,7 +1128,8 @@ _qe_bytestream_peek_bits(u3_atom n, u3_noun bits) return u3i_chub(bit_d & mak_d); } } -u3_noun u3we_bytestream_peek_bits(u3_noun cor) +u3_noun +u3we_bytestream_peek_bits(u3_noun cor) { u3_atom n; u3_noun bits; @@ -1080,13 +1168,13 @@ _qe_bytestream_read_bits(u3_atom n, u3_noun bits) u3m_bail(c3__exit); } - if (n_w > 64) { + if (n_w > BITS_D) { return u3_none; } c3_d bet_d = 0; - if (n_w == 64) { + if (n_w == BITS_D) { bet_d = bit_d; } else { @@ -1094,21 +1182,16 @@ _qe_bytestream_read_bits(u3_atom n, u3_noun bits) bet_d = bit_d & mak_d; } - c3_w dop_w = n_w; - - if (dop_w > num_w) { - dop_w = num_w; - } - - bit_d >>= dop_w; - num_w -= dop_w; + bit_d >>= n_w; + num_w -= n_w; u3_noun new_bits = u3nt(u3i_word(num_w), u3i_chub(bit_d), u3k(bays)); return u3nc(u3i_chub(bet_d), new_bits); } -u3_noun u3we_bytestream_read_bits(u3_noun cor) +u3_noun +u3we_bytestream_read_bits(u3_noun cor) { u3_atom n; u3_noun bits; @@ -1142,6 +1225,10 @@ _qe_bytestream_byte_bits(u3_noun bits) c3_y rem_y = num_w & 0x7; + if (rem_y == 0) { + return u3k(bits); + } + u3_noun new_bits = u3nt(u3i_word(num_w - rem_y), u3i_chub(bit_d >> rem_y), u3k(bays)); @@ -1149,7 +1236,8 @@ _qe_bytestream_byte_bits(u3_noun bits) return new_bits; } -u3_noun u3we_bytestream_byte_bits(u3_noun cor) +u3_noun +u3we_bytestream_byte_bits(u3_noun cor) { u3_noun bits; diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 1b4d5e4ea8..82d7ba6687 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2392,7 +2392,6 @@ static u3j_core _138_hex_bytestream_d[] = {"read-bits", 7, _138_hex_bytestream_read_bits_a, 0, no_hashes }, // {"read-need-bits", 7, _138_hex_bytestream_read_need_bits_a, 0, no_hashes }, {"byte-bits", 7, _138_hex_bytestream_byte_bits_a, 0, no_hashes }, - }; static u3j_core _138_hex_d[] = @@ -2413,7 +2412,7 @@ static u3j_core _138_hex_d[] = { "secp", 6, 0, _140_hex_secp_d, no_hashes }, { "mimes", 31, 0, _140_hex_mimes_d, no_hashes }, { "json", 31, 0, _139_hex_json_d, no_hashes }, - { "bytestream", 3, 0, _138_hex_bytestream_d, no_hashes}, + { "bytestream", 31, 0, _138_hex_bytestream_d, no_hashes}, {} };