#parse the database file to make it easier to process cat output_filtered_ns_only.txt | grep -v syn | grep -v UTR | awk 'NF==5 && $1!~/an:/{print h,m,s,an; h=$0;an="-";s="-";m="-";next;}; NF==12{m=$0;next;} ; $1~/ns:/{s=$7;next;}; $1~/an:/{an=$3;next}' > ns_filter_reformat.txt cat ns_filter_reformat.txt | awk '{ printf "%s_%d %s\n", $2,$3,$0 }' | sort +0 -1 > ns_filter_reformat.joinField0.txt # to make the reference set from alll the normals, before varfilter: find /projects/rcorbettAlignment/ -name .snapshot -prune -o -regex '.*pileup' | grep -v G | xargs -i cat {} > all_normal_snps.pileup #using the chr_pos as the field to check against. Could also use the base, but not doing so now cat all_normal_snps.pileup | awk '{ printf "%s_%s %s_%s_%s %s\n", $1,$2,$1,$2,$4,$0 }' | sort -u +0 -1 > all_normal_snps.pileup.unique.joinField0 #compare join -v 2 all_normal_snps.pileup.unique.joinField0 ns_filter_reformat.joinField0.txt #compare again cat ns_filter_reformat.joinField0.txt.joined | awk '{ sub(/[ACTGactg]>>>/,"", $16); new_base=tolower($16); p_bases=tolower($11) }; {if(index(p_bases, new_base)==0){ print new_base, p_bases, $0 }}'