#!/usr/bin/perl

use strict;
use Getopt::Long;

MAIN : {

    my $read1_bam;
    my $read2_bam;
    my $qual_limit = 0;
    my $single_flag;

    GetOptions('file1=s' => \$read1_bam,
	       'file2=s' => \$read2_bam,
	       'qual=i' => \$qual_limit,
	       'single' => \$single_flag);
    
    my $error1 = "\nUsage: ./two_read_bam_combiner.pl --file1 read1_bam_file --file2 read2_bam_file\n\n";
    my $error2 = "Options:\n";
    my $error3 = "--qual N [Reads pairs with mapq less than N will be filtered out, default is 0]\n";
    my $error4 = "--single [Will also print single ended reads with mapq greater than quality limit]\n";

    if ((not defined $read1_bam) ||
	(not defined $read2_bam)) {
	die ($error1,$error2,$error3,$error4,"\n");
    }

    if ($single_flag) {
	print STDERR "Will print read pairs and single reads with mapping quality greater than $qual_limit\n";
    } else {
	print STDERR "Will print read pairs with mapping quality greater than $qual_limit\n";
    }

    open(FILE1,"samtools view -h $read1_bam |");
    open(FILE2,"samtools view $read2_bam |");

    my $line1 = <FILE1>;
    my $line2 = <FILE2>;

    until ($line1 !~ m/^\@/) {
	print $line1;
	$line1 = <FILE1>;
    }

    my $counter = 0;
    my $new_counter = 0;

    my $next_line1 = <FILE1>;
    my $next_line2 = <FILE2>;

    while () {
	$counter++;
	if ($counter == ($new_counter + 100000)) {
	    print STDERR $counter . "\n";
	    $new_counter = $counter;
	}
	chomp $line1;
	chomp $line2;

	chomp $next_line1;
	chomp $next_line2;
	
	my ($id1, $flag1, $chr_from1, $loc_from1, $mapq1, $cigar1, $d1_1, $d2_1, $d3_1, $read1, $read_qual1, @rest1) = split(/\t/,$line1);
	my ($id2, $flag2, $chr_from2, $loc_from2, $mapq2, $cigar2, $d1_2, $d2_2, $d3_2, $read2, $read_qual2, @rest2) = split(/\t/,$line2);

	my ($next_id1, $next_flag1, $next_chr_from1, $next_loc_from1, $next_mapq1, $next_cigar1, $next_d1_1, $next_d2_1, $next_d3_1, $next_read1, $next_read_qual1, @next_rest1) = split(/\t/,$next_line1);
        my ($next_id2, $next_flag2, $next_chr_from2, $next_loc_from2, $next_mapq2, $next_cigar2, $next_d1_2, $next_d2_2, $next_d3_2, $next_read2, $next_read_qual2, @next_rest2) = split(/\t/,$next_line2);
	
	my @bin1 = split(//,reverse(dec2bin($flag1)));
	my @bin2 = split(//,reverse(dec2bin($flag2)));

	my @next_bin1 = split(//,reverse(dec2bin($next_flag1)));
	my @next_bin2 = split(//,reverse(dec2bin($next_flag2)));

	my @array1;
	push(@array1,$line1);

	until ($id1 ne $next_id1) {
	    push(@array1,$next_line1);
	    $line1 = $next_line1;
	    $next_line1 = <FILE1>;
	    chomp $next_line1;
	    ($id1, $flag1, $chr_from1, $loc_from1, $mapq1, $cigar1, $d1_1, $d2_1, $d3_1, $read1, $read_qual1, @rest1) = split(/\t/,$line1);
	    ($next_id1, $next_flag1, $next_chr_from1, $next_loc_from1, $next_mapq1, $next_cigar1, $next_d1_1, $next_d2_1, $next_d3_1, $next_read1, $next_read_qual1, @next_rest1) = split(/\t/,$next_line1);
	}

	my @array2;
	push(@array2,$line2);

	until ($id2 ne $next_id2) {
            push(@array2,$next_line2);
            $line2 = $next_line2;
            $next_line2 = <FILE2>;
            chomp $next_line2;
	    ($id2, $flag2, $chr_from2, $loc_from2, $mapq2, $cigar2, $d1_2, $d2_2, $d3_2, $read2, $read_qual2, @rest2) = split(/\t/,$line2);
	    ($next_id2, $next_flag2, $next_chr_from2, $next_loc_from2, $next_mapq2, $next_cigar2, $next_d1_2, $next_d2_2, $next_d3_2, $next_read2, $next_read_qual2, @next_rest2) = split(/\t/,$next_line2);
        }

	my $primary_line1;
	
	if (scalar(@array1) > 1) {
	    foreach my $read (@array1) {
		my ($read_id1, $f1, $chr1, $loc1, $m1, $c1, $d1_r, $d2_r, $d3_r, $seq1, $q1, @r1) = split(/\t/,$read);
		my @read_bin1 = split(//,reverse(dec2bin($f1)));
		if ($read_bin1[8] == 0) {
		    $primary_line1 = $read;
		}
	    }
	} else {
	    $primary_line1 = $array1[0];
	}

	my $primary_line2;
	
	if (scalar(@array2) > 1) {
            foreach my $read (@array2) {
		my ($read_id1, $f1, $chr1, $loc1, $m1, $c1, $d1_r, $d2_r, $d3_r, $seq1, $q1, @r1) = split(/\t/,$read);
		my @read_bin1 = split(//,reverse(dec2bin($f1)));
		if ($read_bin1[8] == 0) {
                    $primary_line2 = $read;
                }
            }
        } else {
            $primary_line2 = $array2[0];
        }

	my ($p_id1, $p_flag1, $p_chr_from1, $p_loc_from1, $p_mapq1, $p_cigar1, $p_d1_1, $p_d2_1, $p_d3_1, $p_read1, $p_read_qual1, @p_rest1) = split(/\t/,$primary_line1);
	my ($p_id2, $p_flag2, $p_chr_from2, $p_loc_from2, $p_mapq2, $p_cigar2, $p_d1_2, $p_d2_2, $p_d3_2, $p_read2, $p_read_qual2, @p_rest2) = split(/\t/,$primary_line2);

	my @p_bin1 = split(//,reverse(dec2bin($p_flag1)));
	my @p_bin2 = split(//,reverse(dec2bin($p_flag2)));

	foreach my $read (@array1) {

	    my ($read_id1, $f1, $chr1, $loc1, $m1, $c1, $d1_r, $d2_r, $d3_r, $seq1, $q1, @r1) = split(/\t/,$read);
	    my @read_bin1 = split(//,reverse(dec2bin($f1)));

	    my $trouble = 0;
	    if (($read_bin1[2] == 1) && ($m1 >= 10)) {
		$trouble = 1;
	    }
	    if (($p_bin2[2] == 1) && ($p_mapq2 >= 10)) {
		$trouble = 1;
	    }
	    
	    my $qual_test1 = 0;
	    my $qual_test2 = 0;
	    if ($qual_limit == 0) {
		if ($m1 == 0) {
		    $qual_test1 = 1;
		}
		if ($p_mapq2 == 0) {
		    $qual_test2 = 1;
		}
	    } else {
		if ($m1 < $qual_limit) {
		    $qual_test1 = 1;
		}
		if ($p_mapq2 < $qual_limit) {
		    $qual_test2 = 1;
		}
	    }
	    
	    my $proper_pair1;
	    my $proper_pair2;
	    my $dist1;
	    my $dist2;
	    
	    if (($read_bin1[2] == 0) && ($p_bin2[2] == 0)) {
		$proper_pair1 = 1;
		$proper_pair2 = 1;
		if ($chr1 eq $p_chr_from2) {
		    my $read_length2 = get_read_length_cigar($p_cigar2);
		    my $read_length1 = get_read_length_cigar($c1);
		    my $dist = abs($loc1 - $p_loc_from2);		    
		    if ($loc1 > $p_loc_from2) {
			$dist1 = -1*($dist + $read_length1);
			$dist2 = ($dist + $read_length1);
		    } else {
			$dist1 = ($dist + $read_length2);
			$dist2 = -1*($dist + $read_length2);
		    }
		} else {
		    $dist1 = 0;
		    $dist2 = 0;
		}
	    } else {
		$proper_pair1 = 0;
		$proper_pair2 = 0;
		$dist1 = 0;
		$dist2 = 0;
	    }
	    
	    my $out_chr1;
	    my $out_chr2;
	    if ($chr1 eq $p_chr_from2) {
		if (($chr1 ne "*") &&
		    ($p_chr_from2 ne "*")) {
		    $out_chr1 = "=";
		    $out_chr2 = "=";
		} else {
		    $out_chr1 = "*";
		    $out_chr2 = "*";
		}
	    } else {
		$out_chr1 = $p_chr_from2;
		$out_chr2 = $chr1;
	    }
	    
	    my $new_bin1 = join("","000000000000000000000",$read_bin1[10],$read_bin1[9],$read_bin1[8],"0","1",$p_bin2[4],$read_bin1[4],$p_bin2[2],$read_bin1[2],$proper_pair1,"1");
	    my $new_bin2 = join("","000000000000000000000",$p_bin2[10],$p_bin2[9],$p_bin2[8],"1","0",$read_bin1[4],$p_bin2[4],$read_bin1[2],$p_bin2[2],$proper_pair2,"1");

	    my $new_flag1 = bin2dec($new_bin1);
	    my $new_flag2 = bin2dec($new_bin2);

	    unless ($trouble > 0) {

		if (($qual_test1 == 0) &&
		    ($qual_test2 == 0)) {
		    
		    print(join("\t",$read_id1,$new_flag1,$chr1,$loc1,$m1,$c1,$out_chr1,$p_loc_from2,$dist1,$seq1,$q1,@r1) . "\n");
#		    print(join("\t",$p_id2,$new_flag2,$p_chr_from2,$p_loc_from2,$p_mapq2,$p_cigar2,$out_chr2,$loc1,$dist2,$p_read2,$p_read_qual2,@p_rest2) . "\n");

		} elsif (($qual_test1 == 0) && ($qual_test2 > 0) && ($single_flag)) {

		    print $read . "\n";
		
		} elsif (($qual_test1 > 0) && ($qual_test2 == 0) && ($single_flag)) {
		
#		    print $primary_line2 . "\n";

		}
	    }
	}

	foreach my $read (@array2) {

	    my ($read_id2, $f2, $chr2, $loc2, $m2, $c2, $d1_r, $d2_r, $d3_r, $seq2, $q2, @r2) = split(/\t/,$read);
            my @read_bin2 = split(//,reverse(dec2bin($f2)));

            my $trouble = 0;
            if (($p_bin1[2] == 1) && ($p_mapq1 >= 10)) {
                $trouble = 1;
            }
            if (($read_bin2[2] == 1) && ($m2 >= 10)) {
                $trouble = 1;
            }

	    my $qual_test1 = 0;
            my $qual_test2 = 0;
            if ($qual_limit == 0) {
                if ($p_mapq1 == 0) {
                    $qual_test1 = 1;
                }
                if ($m2 == 0) {
                    $qual_test2 = 1;
                }
            } else {
                if ($p_mapq1 < $qual_limit) {
                    $qual_test1 = 1;
                }
                if ($m2 < $qual_limit) {
                    $qual_test2 = 1;
                }
            }

	    my $proper_pair1;
            my $proper_pair2;
            my $dist1;
            my $dist2;

            if (($p_bin1[2] == 0) && ($read_bin2[2] == 0)) {
                $proper_pair1 = 1;
                $proper_pair2 = 1;
                if ($p_chr_from1 eq $chr2) {
		    my $read_length1 = get_read_length_cigar($p_cigar1);
		    my $read_length2 = get_read_length_cigar($c2);
                    my $dist = abs($p_loc_from1 - $loc2);
                    if ($loc2 < $p_loc_from1) {
                        $dist1 = -1*($dist + $read_length1);
                        $dist2 = ($dist + $read_length1);
                    } else {
                        $dist1 = ($dist + $read_length2);
                        $dist2 = -1*($dist + $read_length2);
                    }
                } else {
                    $dist1 = 0;
                    $dist2 = 0;
                }
            } else {
                $proper_pair1 = 0;
                $proper_pair2 = 0;
                $dist1 = 0;
                $dist2 = 0;
            }

	    my $out_chr1;
            my $out_chr2;
            if ($p_chr_from1 eq $chr2) {
                if (($p_chr_from1 ne "*") &&
                    ($chr2 ne "*")) {
                    $out_chr1 = "=";
                    $out_chr2 = "=";
                } else {
                    $out_chr1 = "*";
                    $out_chr2 = "*";
                }
            } else {
                $out_chr1 = $chr2;
                $out_chr2 = $p_chr_from1;
            }

            my $new_bin1 = join("","000000000000000000000",$p_bin1[10],$p_bin1[9],$p_bin1[8],"0","1",$read_bin2[4],$p_bin1[4],$read_bin2[2],$p_bin1[2],$proper_pair1,"1");
            my $new_bin2 = join("","000000000000000000000",$read_bin2[10],$read_bin2[9],$read_bin2[8],"1","0",$p_bin1[4],$read_bin2[4],$p_bin1[2],$read_bin2[2],$proper_pair2,"1");

	    my $new_flag1 = bin2dec($new_bin1);
            my $new_flag2 = bin2dec($new_bin2);

            unless ($trouble > 0) {

                if (($qual_test1 == 0) &&
                    ($qual_test2 == 0)) {

#                    print(join("\t",$p_id1,$new_flag1,$p_chr_from1,$p_loc_from1,$p_mapq1,$p_cigar1,$out_chr1,$loc2,$dist1,$p_read1,$p_read_qual1,@p_rest1) . "\n");
                    print(join("\t",$read_id2,$new_flag2,$chr2,$loc2,$m2,$c2,$out_chr2,$p_loc_from1,$dist2,$seq2,$q2,@r2) . "\n");

                } elsif (($qual_test1 == 0) && ($qual_test2 > 0) && ($single_flag)) {

#                    print $primary_line1 . "\n";
		    
                } elsif (($qual_test1 > 0) && ($qual_test2 == 0) && ($single_flag)) {

                    print $read . "\n";

                }
            }

	}

	$line1 = $next_line1;
	$line2 = $next_line2;

	if ((not defined $line1) ||
	    (not defined $line2)) {
	    last;
	}

	$next_line1 = <FILE1>;
	$next_line2 = <FILE2>;

    }

}

sub frag_search {
    
    my $chr_from = $_[0];
    my $loc_from = $_[1];
    my $bin = $_[2];
    my $cigar = $_[3];
    my $for_frag_hash = $_[4];
    my $rev_frag_hash = $_[5];
    my $for_frag_lookup = $_[6];
    my $rev_frag_lookup = $_[7];

    if ($bin == 0) {
	my $i = 0;
	my $j = scalar(@{$for_frag_hash->{$chr_from}}) - 1;
	my $mid = int(($i + $j)/2);
	while (($j - $i) > 1) {
	    if ($loc_from > $for_frag_hash->{$chr_from}->[$mid]) {
		$i = $mid;
		$mid = int(($i + $j)/2);
	    } else {
		$j = $mid;
		$mid = int(($i + $j)/2);
	    }
	}
	my $start;
	if ($loc_from < $for_frag_hash->{$chr_from}->[$j]) {
	    $start = $for_frag_hash->{$chr_from}->[$i];
	} else {
	    $start = $for_frag_hash->{$chr_from}->[$j];	    
	}
	my $end = $for_frag_lookup->{$chr_from}->{$start};
	my $out_frag = $chr_from . "," . $start . "," . $end;
	return($out_frag);
    } else {
	my $i = 0;
        my $j = scalar(@{$rev_frag_hash->{$chr_from}}) - 1;
        my $mid = int(($i + $j)/2);
        while (($j - $i) > 1) {
            if (($loc_from + get_read_length_cigar($cigar)) > $rev_frag_hash->{$chr_from}->[$mid]) {
                $i = $mid;
                $mid = int(($i + $j)/2);
            } else {
                $j = $mid;
                $mid = int(($i + $j)/2);
            }
        }
	my $end;
        if (($loc_from + get_read_length_cigar($cigar))< $rev_frag_hash->{$chr_from}->[$i]) {
            $end = $rev_frag_hash->{$chr_from}->[$i];
        } else {
            $end = $rev_frag_hash->{$chr_from}->[$j];
        }
        my $start = $rev_frag_lookup->{$chr_from}->{$end};
        my $out_frag = $chr_from . "," . $start . "," . $end;
        return($out_frag);
    }

}

sub get_read_length_cigar {

    my $cigar = $_[0];
    my @cig_array = ($cigar =~ m/\d+\D+/g);
    my $sum = 0;
    foreach my $cig (@cig_array) {
	if ($cig =~ m/[MDN]/) {
	    chop $cig;
	    $sum += $cig;
	}
    }
    return($sum);

}

sub id_test {

    my $id1 = $_[0];
    my $id2 = $_[1];

    $id1 =~ s/\/[12]$//g;
    $id2 =~ s/\/[12]$//g;

    if ($id1 ne $id2) {
	die ("The IDs don't match up\n");
    } else {
	return($id1,$id2);
    }

}

sub bin_search {

    my $chr = $_[0];
    my $loc = $_[1];
    my $hash = $_[2];
    my $lookup = $_[3];

    if ((not defined $hash->{$chr}) ||
	(not defined $lookup->{$chr})) {
	return(1);
    } else {
	my $i = 0;
	my $j = scalar(@{$hash->{$chr}}) - 1;
	my $mid = int(($i + $j)/2);
	while (($j - $i) > 1) {
	    if ($loc < $hash->{$chr}->[$mid]) {
		$j = $mid;
		$mid = int(($i + $j)/2);
	    } else {
		$i = $mid;
		$mid = int(($i + $j)/2);
	    }
	}
	if (($loc < $hash->{$chr}->[$i]) ||
	    ($loc > $lookup->{$chr}->{$hash->{$chr}->[$i]})) {
	    return(1);
	} else {
	    return(0);
	}

    }

}

sub dec2bin {

    my $str = unpack("B32", pack("N", shift));
    return $str;

}

sub bin2dec {
    return unpack("N", pack("B32", substr("0" x 32 . shift, -32)));
}
    
