8000 query padding by ekg · Pull Request #339 · waveygang/wfmash · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

query padding #339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/align/include/align_parameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ struct Parameters {
bool disable_chain_patching; //Disable alignment patching at chain boundaries
bool multithread_fasta_input; //Multithreaded fasta input
uint64_t target_padding; //Additional padding around target sequence
uint64_t query_padding; //Additional padding around query sequence

#ifdef WFA_PNG_TSV_TIMING
// plotting
Expand Down
29 changes: 25 additions & 4 deletions src/align/include/computeAlignments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ struct seq_record_t {
* @param[in] mappingRecordLine
* @param[out] currentRecord
*/
inline static void parseMashmapRow(const std::string &mappingRecordLine, MappingBoundaryRow &currentRecord, const uint64_t target_padding) {
inline static void parseMashmapRow(const std::string &mappingRecordLine, MappingBoundaryRow &currentRecord, const uint64_t target_padding, const uint64_t query_padding = 0) {
auto tokens = tokenize_view(mappingRecordLine);

// Check if the number of tokens is at least 13
Expand Down Expand Up @@ -241,11 +241,14 @@ struct seq_record_t {
currentRecord.chain_length = chain_length;
currentRecord.chain_pos = chain_pos;

// Apply target padding while ensuring we don't go below 0 or above reference length
// Parse position values
uint64_t rStartPos = std::stoi(std::string(tokens[7]));
uint64_t rEndPos = std::stoi(std::string(tokens[8]));
uint64_t qStartPos = currentRecord.qStartPos;
uint64_t qEndPos = currentRecord.qEndPos;
const uint64_t query_len = std::stoull(std::string(tokens[1])); // Query sequence length

// Always apply target padding
// Apply target padding while ensuring we don't go below 0 or above reference length
if (target_padding > 0) {
if (rStartPos >= target_padding) {
rStartPos -= target_padding;
Expand All @@ -258,6 +261,24 @@ struct seq_record_t {
rEndPos = ref_len;
}
}

// Apply query padding while ensurin 8000 g we don't go below 0 or above query length
if (query_padding > 0) {
if (qStartPos >= query_padding) {
qStartPos -= query_padding;
} else {
qStartPos = 0;
}
if (qEndPos + query_padding <= query_len) {
qEndPos += query_padding;
} else {
qEndPos = query_len;
}

// Update the query positions
currentRecord.qStartPos = qStartPos;
currentRecord.qEndPos = qEndPos;
}

// Validate coordinates against reference length
if (rStartPos >= ref_len || rEndPos > ref_len) {
Expand Down Expand Up @@ -403,7 +424,7 @@ void processMappingRecord(
try {
// Parse the mapping record
MappingBoundaryRow currentRecord;
parseMashmapRow(record, currentRecord, param.target_padding);
parseMashmapRow(record, currentRecord, param.target_padding, param.query_padding);

// Create sequence record
std::unique_ptr<seq_record_t> seq_rec(
Expand Down
4 changes: 2 additions & 2 deletions src/common/progress.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ class ProgressMeter {
start_time = std::chrono::high_resolution_clock::now();
last_file_update = start_time;

// Check if stderr is a TTY *AND* stdout is not redirected to a file
use_progress_bar = isatty(fileno(stderr)) && isatty(fileno(stdout));
// Check if stderr is a TTY
use_progress_bar = isatty(fileno(stderr));

// For file output, print initial banner with 0% progress
if (!use_progress_bar) {
Expand Down
24 changes: 19 additions & 5 deletions src/interface/parse_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ void parse_args(int argc,

args::Group alignment_opts(options_group, "Alignment:");
args::ValueFlag<std::string> input_mapping(alignment_opts, "FILE", "input PAF file for alignment", {'i', "align-paf"});
args::ValueFlag<std::string> target_padding(alignment_opts, "INT", "padding around target sequence [100]", {'E', "target-padding"});
args::ValueFlag<std::string> target_padding(alignment_opts, "INT", "padding around target sequence [500]", {'E', "target-padding"});
args::ValueFlag<std::string> query_padding(alignment_opts, "INT", "padding around query sequence [500]", {'U', "query-padding"});
args::ValueFlag<std::string&g 8000 t; wfa_params(alignment_opts, "vals",
"scoring: mismatch, gap1(o,e), gap2(o,e) [6,6,3,24,1]", {'g', "wfa-params"});
args::Flag disable_chain_patching(alignment_opts, "", "disable alignment patching at chain boundaries", {"disable-chain-patching"});
Expand Down Expand Up @@ -293,9 +294,9 @@ void parse_args(int argc,
align_parameters.wfa_patching_gap_opening_score2 = params[3];
align_parameters.wfa_patching_gap_extension_score2 = params[4];
} else {
align_parameters.wfa_patching_mismatch_score = 6;
align_parameters.wfa_patching_gap_opening_score1 = 6;
align_parameters.wfa_patching_gap_extension_score1 = 3;
align_parameters.wfa_patching_mismatch_score = 5;
align_parameters.wfa_patching_gap_opening_score1 = 8;
align_parameters.wfa_patching_gap_extension_score1 = 2;
align_parameters.wfa_patching_gap_opening_score2 = 24;
align_parameters.wfa_patching_gap_extension_score2 = 1;
}
Expand Down Expand Up @@ -533,7 +534,20 @@ void parse_args(int argc,
}
align_parameters.target_padding = p;
} else {
align_parameters.target_padding = 100;
// Default to half segment length, capped at 5000
align_parameters.target_padding = std::min(map_parameters.segLength / 2, (int64_t)5000);
}

if (query_padding) {
const int64_t p = handy_parameter(args::get(query_padding));
if (p < 0) {
std::cerr << "[wfmash] ERROR: query padding must be >= 0" << std::endl;
exit(1);
}
align_parameters.query_padding = p;
} else {
// Default to half segment length, capped at 5000
align_parameters.query_padding = std::min(map_parameters.segLength / 2, (int64_t)5000);
}

if (thread_count) {
Expand Down
0