8000 Improve nodejs asyncID tracking by grcevski · Pull Request #1800 · grafana/beyla · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Improve nodejs asyncID tracking #1800 10000

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ debug:
CGO_ENABLED=0 GOOS=$(GOOS) GOARCH=$(GOARCH) go build -mod vendor -gcflags "-N -l" -ldflags="-X '$(BUILDINFO_PKG).Version=$(RELEASE_VERSION)' -X '$(BUILDINFO_PKG).Revision=$(RELEASE_REVISION)'" -a -o bin/$(CMD) $(MAIN_GO_FILE)

.PHONY: dev
dev: prereqs docker-generate compile-for-coverage
dev: prereqs generate compile-for-coverage
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for development, I think we should just use the local development clang/llvm for speed purposes, rather than having to use the docker generate.


# Generated binary can provide coverage stats according to https://go.dev/blog/integration-test-coverage
.PHONY: compile-for-coverage compile-cache-for-coverage
Expand Down
184 changes: 184 additions & 0 deletions bpf/common/float64.h
10000
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
#pragma once

#include <bpfcore/vmlinux.h>
#include <bpfcore/bpf_helpers.h>

// The following code is a software implementation of floating point subtraction
// since eBPF doesn't support floating point instructions in the BPF instruction
// set. The code was adapted from the SoftFP Library by Fabrice Bellard
// https://bellard.org/softfp/ (Licensed under MIT), with a lot of things removed
// related to rounding, floating point denormals etc. The main need for this
// library code is to be able to read the NodeJS asyncID, which just like any
// other JavaScript numbers are stored as float64 in memory.

typedef long int int_fast16_t;

static const uint8_t count_leading_zeros_high[] = {
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};

static __always_inline uint64_t extract_float64_frac(uint64_t a) {
return a & 0x000FFFFFFFFFFFFF;
}

static __always_inline int_fast16_t extract_float64_exp(uint64_t a) {
return (a >> 52) & 0x7FF;
}

static __always_inline char extract_float64_sign(uint64_t a) {
return a >> 63;
}

static __always_inline uint64_t pack_float64(uint8_t z_sign, int_fast16_t z_exp, uint64_t z_sig) {
return (((uint64_t)z_sign) << 63) + (((uint64_t)z_exp) << 52) + z_sig;
}

static __always_inline void shift64_right_jamming(uint64_t a, int_fast16_t count, uint64_t *zPtr) {
uint64_t z;

if (count == 0) {
z = a;
} else if (count < 64) {
z = (a >> count) | ((a << ((-count) & 63)) != 0);
} else {
z = (a != 0);
}
*zPtr = z;
}

static __always_inline uint8_t count_leading_zeros32(uint32_t a) {
uint8_t shift_count = 0;

if (a < 0x10000) {
shift_count += 16;
a <<= 16;
}
if (a < 0x1000000) {
shift_count += 8;
a <<= 8;
}
shift_count += count_leading_zeros_high[a >> 24];
return shift_count;
}

static __always_inline uint8_t count_leading_zeros64(uint64_t a) {
uint8_t shift_count = 0;

if (a < (((uint64_t)1) << 32)) {
shift_count += 32;
} else {
a >>= 32;
}
shift_count += count_leading_zeros32(a);
return shift_count;
}

static __always_inline uint64_t normalize_and_pack_float64(uint8_t z_sign,
int_fast16_t z_exp,
uint64_t z_sig) {
uint8_t shift_count = count_leading_zeros64(z_sig) - 1;

z_sig = z_sig << shift_count;
z_sig = z_sig >> 10;
if (z_sig == 0) {
z_exp = 0;
}

return pack_float64(z_sign, z_exp - shift_count, z_sig);
}

static __always_inline uint64_t sub_float64(uint64_t a, uint64_t b, char z_sign) {
int_fast16_t a_exp;
int_fast16_t b_exp;
int_fast16_t z_exp;
uint64_t a_sig;
uint64_t b_sig;
uint64_t z_sig;
int_fast16_t exp_diff;

a_sig = extract_float64_frac(a);
a_exp = extract_float64_exp(a);
b_sig = extract_float64_frac(b);
b_exp = extract_float64_exp(b);
exp_diff = a_exp - b_exp;
a_sig <<= 10;
b_sig <<= 10;

if (0 < exp_diff) {
goto a_exp_bigger;
}

if (exp_diff < 0) {
goto b_exp_bigger;
}

if (a_exp == 0x7FF) {
return -1;
}

if (a_exp == 0) {
a_exp = 1;
b_exp = 1;
}

if (b_sig < a_sig) {
goto a_bigger;
}

if (a_sig < b_sig) {
goto b_bigger;
}

return pack_float64(0, 0, 0);
b_exp_bigger:
if (b_exp == 0x7FF) {
if (b_sig) {
return -1;
}
return pack_float64(z_sign ^ 1, 0x7FF, 0);
}

if (a_exp == 0) {
++exp_diff;
} else {
a_sig |= 0x4000000000000000;
}

shift64_right_jamming(a_sig, -exp_diff, &a_sig);
b_sig |= 0x4000000000000000;
b_bigger:
z_sig = b_sig - a_sig;
z_exp = b_exp;
z_sign ^= 1;

goto normalize_and_pack;
a_exp_bigger:
if (a_exp == 0x7FF) {
if (a_sig) {
return -1;
}
return a;
}

if (b_exp == 0) {
--exp_diff;
} else {
b_sig |= 0x4000000000000000;
}

shift64_right_jamming(b_sig, exp_diff, &b_sig);
a_sig |= 0x4000000000000000;
a_bigger 57AE :
z_sig = a_sig - b_sig;
z_exp = a_exp;
normalize_and_pack:
--z_exp;

return normalize_and_pack_float64(z_sign, z_exp, z_sig);
}
28 changes: 27 additions & 1 deletion bpf/common/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
#include <bpfcore/vmlinux.h>
#include <bpfcore/bpf_helpers.h>

#include &l B41A t;common/float64.h>

#include <logger/bpf_dbg.h>

#include <maps/active_unix_socks.h>
#include <maps/active_nodejs_ids.h>
#include <maps/nodejs_parent_map.h>
Expand All @@ -26,10 +30,32 @@ static __always_inline u64 extra_runtime_id() {
}

static __always_inline u64 parent_runtime_id(u64 runtime_id) {
u64 *parent_id = (u64 *)bpf_map_lookup_elem(&nodejs_parent_map, &runtime_id);
u64 lookup_id = runtime_id;
bpf_dbg_printk("parent lookup id %llx", lookup_id);
u64 *parent_id = (u64 *)bpf_map_lookup_elem(&nodejs_parent_map, &lookup_id);
if (parent_id) {
return *parent_id;
}

// When NodeJS uses await, sometimes the JavaScript interpreted code, which
// we cannot instrument will bump the asyncID. Because of this, we will not
// be able to find out asyncID call chain for context propagation. This code
// tries to look for close enough asyncIDs that will allow us to still
// find the chain.
for (u32 sub = 0; sub < 5; sub++) {
// lookup_id (as double) - 1 (as double)
lookup_id = sub_float64(lookup_id, 0x3ff0000000000000, 0);

if (lookup_id == -1 || lookup_id == 0) {
return 0;
}

bpf_dbg_printk("looking up id %llx", lookup_id);
u64 *parent_id = (u64 *)bpf_map_lookup_elem(&nodejs_parent_map, &lookup_id);
if (parent_id) {
return *parent_id;
}
}

return 0;
}
2 changes: 1 addition & 1 deletion bpf/common/trace_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ static __always_inline tp_info_pid_t *find_parent_trace(const pid_connection_inf
}

attempts++;
} while (attempts < 3); // Up to 3 levels of thread nesting allowed
} while (attempts < 5); // Up to 5 levels of thread nesting allowed
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if this 5 needs to be in sync with the one in the for inside runtime.h, perhaps we should use a constant

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are technically different, one is how many times we go up the child->parent chain (this one in runtime) and the other is how many times we decrement the current async_id until we find one matching.


cp_support_data_t *conn_t_key = bpf_map_lookup_elem(&cp_support_connect_info, p_conn);

Expand Down
3 changes: 2 additions & 1 deletion bpf/generictracer/nodejs.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ int beyla_emit_async_init(struct pt_regs *ctx) {
if (async_id) {
bpf_map_update_elem(&active_nodejs_ids, &id, &async_id, BPF_ANY);
if (trigger_async_id) {
bpf_map_update_elem(&nodejs_parent_map, &async_id, &trigger_async_id, BPF_ANY);
bpf_map_update_elem(
&nodejs_parent_map, &async_id, &trigger_async_id, BPF_NOEXIST);
bpf_dbg_printk(
"async_id = %llx, trigger_async_id = %llx", async_id, trigger_async_id);
} else {
Expand Down
Loading
Loading
0