@@ -50,7 +50,7 @@ THE SOFTWARE. */
5050#define CHECK_REF_FIX 8
5151
5252#define MROWS_SPLIT 1
53- #define MROWS_MERGE 2
53+ #define MROWS_MERGE 2
5454
5555// Logic of the filters: include or exclude sites which match the filters?
5656#define FLT_INCLUDE 1
@@ -80,11 +80,19 @@ typedef struct
8080}
8181cmpals_t ;
8282
83+ typedef struct
84+ {
85+ bcf1_t * rec ;
86+ int pass ;
87+ }
88+ line_t ;
89+
8390typedef struct
8491{
8592 char * tseq , * seq ;
8693 int mseq ;
87- bcf1_t * * lines , * * tmp_lines , * * mrows , * mrow_out ;
94+ bcf1_t * * tmp_lines , * * mrows , * mrow_out ;
95+ line_t * lines ;
8896 int ntmp_lines , mtmp_lines , nmrows , mmrows , mrows_first ;
8997 map_t * maps ; // mrow map for each buffered record
9098 char * * als ;
@@ -2165,37 +2173,42 @@ static void flush_buffer(args_t *args, htsFile *file, int n)
21652173 for (i = 0 ; i < n ; i ++ )
21662174 {
21672175 k = rbuf_shift (& args -> rbuf );
2176+ if ( !args -> lines [k ].pass )
2177+ {
2178+ if ( bcf_write1 (file , args -> out_hdr , args -> lines [k ].rec )!= 0 ) error ("[%s] Error: cannot write to %s\n" , __func__ ,args -> output_fname );
2179+ continue ;
2180+ }
21682181 if ( args -> mrows_op == MROWS_MERGE )
21692182 {
2170- if ( mrows_can_flush (args , args -> lines [k ]) )
2183+ if ( mrows_can_flush (args , args -> lines [k ]. rec ) )
21712184 {
21722185 while ( (line = mrows_flush (args )) )
21732186 if ( bcf_write1 (file , args -> out_hdr , line )!= 0 ) error ("[%s] Error: cannot write to %s\n" , __func__ ,args -> output_fname );
21742187 }
2175- mrows_push (args , & args -> lines [k ]);
2188+ mrows_push (args , & args -> lines [k ]. rec );
21762189 continue ;
21772190 }
21782191 else if ( args -> rmdup )
21792192 {
2180- int line_type = bcf_get_variant_types (args -> lines [k ]);
2181- if ( prev_rid >=0 && prev_rid == args -> lines [k ]-> rid && prev_pos == args -> lines [k ]-> pos )
2193+ int line_type = bcf_get_variant_types (args -> lines [k ]. rec );
2194+ if ( prev_rid >=0 && prev_rid == args -> lines [k ]. rec -> rid && prev_pos == args -> lines [k ]. rec -> pos )
21822195 {
21832196 if ( args -> rmdup & BCF_SR_PAIR_ANY ) { args -> nrmdup ++ ; continue ; } // rmdup by position only
21842197 if ( args -> rmdup & BCF_SR_PAIR_SNPS && line_type & (VCF_SNP |VCF_MNP ) && prev_type & (VCF_SNP |VCF_MNP ) ) { args -> nrmdup ++ ; continue ; }
21852198 if ( args -> rmdup & BCF_SR_PAIR_INDELS && line_type & (VCF_INDEL ) && prev_type & (VCF_INDEL ) ) { args -> nrmdup ++ ; continue ; }
2186- if ( args -> rmdup & BCF_SR_PAIR_EXACT && cmpals_match (args , & args -> cmpals_out , args -> lines [k ]) ) { args -> nrmdup ++ ; continue ; }
2199+ if ( args -> rmdup & BCF_SR_PAIR_EXACT && cmpals_match (args , & args -> cmpals_out , args -> lines [k ]. rec ) ) { args -> nrmdup ++ ; continue ; }
21872200 }
21882201 else
21892202 {
2190- prev_rid = args -> lines [k ]-> rid ;
2191- prev_pos = args -> lines [k ]-> pos ;
2203+ prev_rid = args -> lines [k ]. rec -> rid ;
2204+ prev_pos = args -> lines [k ]. rec -> pos ;
21922205 prev_type = 0 ;
21932206 if ( args -> rmdup & BCF_SR_PAIR_EXACT ) cmpals_reset (& args -> cmpals_out );
21942207 }
21952208 prev_type |= line_type ;
2196- if ( args -> rmdup & BCF_SR_PAIR_EXACT ) cmpals_add (args ,& args -> cmpals_out , args -> lines [k ]);
2209+ if ( args -> rmdup & BCF_SR_PAIR_EXACT ) cmpals_add (args ,& args -> cmpals_out , args -> lines [k ]. rec );
21972210 }
2198- if ( bcf_write1 (file , args -> out_hdr , args -> lines [k ])!= 0 ) error ("[%s] Error: cannot write to %s\n" , __func__ ,args -> output_fname );
2211+ if ( bcf_write1 (file , args -> out_hdr , args -> lines [k ]. rec )!= 0 ) error ("[%s] Error: cannot write to %s\n" , __func__ ,args -> output_fname );
21992212 }
22002213 if ( args -> mrows_op == MROWS_MERGE && !args -> rbuf .n )
22012214 {
@@ -2220,7 +2233,7 @@ static void init_data(args_t *args)
22202233 bcf_hdr_printf (args -> out_hdr ,"##INFO=<ID=%s,Number=1,Type=String,Description=\"Original variant. Format: CHR|POS|REF|ALT|USED_ALT_IDX\">" ,args -> old_rec_tag );
22212234
22222235 rbuf_init (& args -> rbuf , 100 );
2223- args -> lines = (bcf1_t * * ) calloc (args -> rbuf .m , sizeof (bcf1_t * ));
2236+ args -> lines = (line_t * ) calloc (args -> rbuf .m , sizeof (* args -> lines ));
22242237 if ( args -> ref_fname )
22252238 {
22262239 args -> fai = fai_load (args -> ref_fname );
@@ -2279,7 +2292,7 @@ static void destroy_data(args_t *args)
22792292 cmpals_destroy (& args -> cmpals_out );
22802293 int i ;
22812294 for (i = 0 ; i < args -> rbuf .m ; i ++ )
2282- if ( args -> lines [i ] ) bcf_destroy1 (args -> lines [i ]);
2295+ if ( args -> lines [i ]. rec ) bcf_destroy1 (args -> lines [i ]. rec );
22832296 free (args -> lines );
22842297 for (i = 0 ; i < args -> mtmp_lines ; i ++ )
22852298 if ( args -> tmp_lines [i ] ) bcf_destroy1 (args -> tmp_lines [i ]);
@@ -2357,18 +2370,19 @@ static void normalize_line(args_t *args, bcf1_t *line)
23572370 }
23582371
23592372 // insert into sorted buffer
2360- rbuf_expand0 (& args -> rbuf ,bcf1_t * ,args -> rbuf .n + 1 ,args -> lines );
2373+ rbuf_expand0 (& args -> rbuf ,line_t ,args -> rbuf .n + 1 ,args -> lines );
23612374 int i ,j ;
23622375 i = j = rbuf_append (& args -> rbuf );
2363- if ( args -> lines [i ] ) bcf_destroy (args -> lines [i ]);
2364- args -> lines [i ] = bcf_dup (line );
2376+ if ( args -> lines [i ].rec ) bcf_destroy (args -> lines [i ].rec );
2377+ args -> lines [i ].rec = bcf_dup (line );
2378+ args -> lines [i ].pass = args -> filter_pass ;
23652379 while ( rbuf_prev (& args -> rbuf ,& i ) )
23662380 {
2367- if ( args -> lines [i ]-> rid == args -> lines [j ]-> rid )
2381+ if ( args -> lines [i ]. rec -> rid == args -> lines [j ]. rec -> rid )
23682382 {
2369- bcf_unpack (args -> lines [i ], BCF_UN_STR );
2370- bcf_unpack (args -> lines [j ], BCF_UN_STR );
2371- if ( args -> cmp_func (& args -> lines [i ], & args -> lines [j ]) > 0 ) SWAP (bcf1_t * , args -> lines [i ], args -> lines [j ]);
2383+ bcf_unpack (args -> lines [i ]. rec , BCF_UN_STR );
2384+ bcf_unpack (args -> lines [j ]. rec , BCF_UN_STR );
2385+ if ( args -> cmp_func (& args -> lines [i ]. rec , & args -> lines [j ]. rec ) > 0 ) SWAP (line_t , args -> lines [i ], args -> lines [j ]);
23722386 }
23732387 j = i ;
23742388 }
@@ -2443,8 +2457,8 @@ static void normalize_vcf(args_t *args)
24432457 if ( done ) break ; // no more lines available
24442458 int i = args -> rbuf .f ;
24452459 int j = rbuf_last (& args -> rbuf );
2446- if ( args -> lines [i ]-> rid != args -> lines [j ]-> rid ) break ;
2447- if ( args -> lines [i ]-> pos != args -> lines [j ]-> pos ) break ;
2460+ if ( args -> lines [i ]. rec -> rid != args -> lines [j ]. rec -> rid ) break ;
2461+ if ( args -> lines [i ]. rec -> pos != args -> lines [j ]. rec -> pos ) break ;
24482462 }
24492463 if ( done ) break ;
24502464
@@ -2454,16 +2468,16 @@ static void normalize_vcf(args_t *args)
24542468 int i , j = 0 ;
24552469 for (i = -1 ; rbuf_next (& args -> rbuf ,& i ); )
24562470 {
2457- if ( args -> lines [ifst ]-> rid != args -> lines [ilast ]-> rid )
2471+ if ( args -> lines [ifst ]. rec -> rid != args -> lines [ilast ]. rec -> rid )
24582472 {
24592473 // there are two chromosomes in the buffer, count how many are on the first chromosome
2460- if ( args -> lines [ifst ]-> rid != args -> lines [i ]-> rid ) break ;
2474+ if ( args -> lines [ifst ]. rec -> rid != args -> lines [i ]. rec -> rid ) break ;
24612475 j ++ ;
24622476 continue ;
24632477 }
24642478 // there is just one chromosome, flush only lines that are unlikely to change order on
24652479 // realigning (the buf_win constant)
2466- if ( args -> lines [ilast ]-> pos - args -> lines [i ]-> pos < args -> buf_win ) break ;
2480+ if ( args -> lines [ilast ]. rec -> pos - args -> lines [i ]. rec -> pos < args -> buf_win ) break ;
24672481 j ++ ;
24682482 }
24692483 if ( j > 0 ) flush_buffer (args , args -> out , j );
0 commit comments