8000 Incremental lookup support · Issue #107 · s-yata/marisa-trie · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content
Incremental lookup support #107
Open
@jmr

Description

@jmr

We have some patches for incremental lookup. Here's what the new Trie functions look like:

// Copyright 2025 Google LLC.
// SPDX-License-Identifier: Apache-2.0

// Incrementally looks up the trie from the specified query position and node
// ID stored in the given agent. If failed to find a registered key, the agent
// will keep the last query position and node ID from which the prefix of one
// or more registered keys can be matched.
//
// Example:
//   The trie is built from 2 keys, "batch", "big".
//   // "a" is neither a registered key nor a matched prefix.
//   agent.set_query("a");
//   ASSERT(trie.incremental_lookup(agent) == MARISA_NO_MATCHING_KEY);
//   ASSERT(agent.state().query_pos() == 0);
//
//   // "b" is a matched prefix. The outcome query position will be 1 while
//   // the node ID will be the position of "b" matched on the trie.
//   agent.set_query("b");
//   ASSERT(trie.incremental_lookup(agent) ==
//     MARISA_PREFIX_MATCHING_KEY_FOUND);
//   ASSERT(agent.state().query_pos() == 1);
//
//   // "bi" is also a matched prefix. But "ig" might be compacted together
//   // in the underlying trie. If so, the outcome query position and node ID
//   // are still the values that match "b".
//   agent.set_query("bi");
//   ASSERT(trie.incremental_lookup(agent) ==
//     MARISA_PREFIX_MATCHING_KEY_FOUND);
//   ASSERT(agent.state().query_pos() == 1);
//
//   // "big" is a registered key. The outcome query position and node ID are
//   // the values that match the whole query.
//   agent.set_query("big");
//   ASSERT(trie.incremental_lookup(agent) == MARISA_MATCHING_KEY_FOUND);
//   ASSERT(agent.state().query_pos() == 3);
//
marisa_lookup_result_code incremental_lookup(Agent &agent) const;

// Similar to above one, this function can also support wildcard.
//
// Caller should call this function multiple times until it returns
// MARISA_NO_MATCHING_KEY_FOUND to enumerate all search results of a wildcard
// lookup query.
//
// Note that we can only handle at most one wildcard character per lookup, and
// the wildcard must be at the end of query. If the original query string
// contains multiple wildcards, caller must split it into several queries
// and call this function multiple times. It's also the caller's
// responsibility to maintain all search states of a lookup, and use them for
// following lookup.
//
// Input:
//   agent.state.wildcard: the wildcard character. The valid range is
// [0, 255]. Wildcard lookup will be ignored if its value is invalid.
//   agent.query: the query string.
//   agent.state: the last matched state.
//
// Output:
//   The lookup result of query.
//
// Example:
//   // Trie: {'batch', 'big'}
//   // Query: {'b*g'}
//   agent.set_query("b*");
//   StateData state_data;
//   state_data.Reset();
//   state_data.wildcard = '*';
//   agent.restore_state(state_data);
//
//   // "b*" can match two search states.
//   ASSERT(trie.incremental_lookup_with_wildcard(agent) ==
//     MARISA_PREFIX_MATCHING_KEY_FOUND);
//   states.push_back();
//   agent.store_state(&states.back());
//   ASSERT(trie.incremental_lookup_with_wildcard(agent) ==
//     MARISA_PREFIX_MATCHING_KEY_FOUND);
//   states.push_back();
//   agent.store_state(&states.back());
//   ASSERT(trie.incremental_lookup_with_wildcard(agent) ==
//     MARISA_NO_MATCHING_KEY_FOUND);
//
//   agent.set_query("b*g");
//   for(const auto& state: states) {
//     agent.restore_state(state);
//     // Only one lookup will return MARISA_MATCHING_KEY_FOUND, the other
//     // will return MARISA_NO_MATCHING_KEY_FOUND.
//     result = trie.incremental_lookup_with_wildcard(agent);
//     // handle result ...
//   }
marisa_lookup_result_code incremental_lookup_with_wildcard(
    Agent &agent) const;

// The following three functions can be used to traverse the trie by node id.

// Gets a child node of given node id, this function can be called repeatly to
// get all children.
//
// Input:
//   agent.query.id: The node id. The root node id for marisa trie is 0.
//
// Output:
//   Returning true means a valid child node is found. In that case, the
// node_id field of agent state contains child node id and the key field
// contains the value of corresponding incoming edge.
//   Returns false if no more child node is found.
//
// Example:
//   // Trie: {'batch', 'big'}
//   agent.set_query(0);
//   ASSERT(trie.traverse_by_node_id(agent));
//   ASSERT(agent.state().node_id() == 1);   // Node 'b'
//   ASSERT(strncmp(agent.key().ptr(), "b", 1) == 0);
//   ASSERT(!trie.traverse_by_node_id(agent));
//
//   agent.set_query(1);
//   ASSERT(trie.traverse_by_node_id(agent));
//   ASSERT(agent.state().node_id() == 2);   // Node 'atch'
//   ASSERT(strncmp(agent.key().ptr(), "atch", 4) == 0);
//   ASSERT(trie.traverse_by_node_id(agent));
//   ASSERT(agent.state().node_id() == 3);   // Node 'ig'
//   ASSERT(strncmp(agent.key().ptr(), "ig", 2) == 0);
//   ASSERT(!trie.traverse_by_node_id(agent));
bool traverse_by_node_id(Agent &agent) const;

// Given a node id, retrieves the value of corresponding incoming edge or
// full path.
//
// Input:
//   agent.query.id: The node id. The root node id for marisa trie is 0.
//   agent.query.reverse_lookup_type: Indicates the type of result value.
//     Default value is is Key.
//
// Output:
//   Returns true if succeed. Lookup result is stored in agent.key.
//   Returns false if failed.
//
// Example:
//   // Trie: {'batch', 'big'}
//   agent.set_query(2);
//   agent.set_reverse_lookup_type(REVERSE_LOOKUP_KEY);
//   ASSERT(trie.reverse_lookup_by_node_id(agent));
//   ASSERT(strncmp(agent.key().ptr(), "batch", 5) == 0);
//
//   agent.set_reverse_lookup_type(REVERSE_LOOKUP_EDGE);
//   ASSERT(trie.reverse_lookup_by_node_id(agent));
//   ASSERT(strncmp(agent.key().ptr(), "atch", 4) == 0);
bool reverse_lookup_by_node_id(Agent &agent) const;

// Given a node id, convert it to key id if it's a leaf node.
//
// Input:
//   agent.query.id: The node id.
//
// Output:
//   Returns true if succeed, the key id is stored in agent.key.id.
//   Returns false if failed.
//
// Example:
//   // Trie: {'batch', 'big'}
//   agent.set_query(2);
//   ASSERT(trie.lookup_key_id_by_node_id(agent));
//   ASSERT(agent.key().id() == 0);
bool lookup_key_id_by_node_id(Agent &agent) const;

I can send PRs for this if it looks interesting.

Metadata

Metadata

Assignees

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions

    0