awk oneliners

awk 'FNR==NR {a[$0]++; next} !($0 in a)' file1 file2

Convert fasta headers to numbers (see

awk '/^>/{print ">" ++i; next}{print}' file.fasta > file.header_mod.fasta

Convert fasta headers to numbers w/a prefix, 'chromosome' here (see

awk '/^>/{print ">chromosome" ++i; next}{print}' < file.fasta

Remove vcf header (see

awk '! /\#/' variants.VCF > no_header.VCF

awk -v N=2 '{ sum += $N } END { if (NR > 0) print sum / NR }'

Fix paired reads that are no longer properly sorted (see

mkfifo tmp
awk 'NR%4==1{n=$1}NR%4==2{s=$1}NR%4==0{print n,s,$1}' r1.fq | sort -S 2G > tmp &
awk 'NR%4==1{n=$1}NR%4==2{s=$1}NR%4==0{print n,s,$1}' r2.fq | sort -S 2G | join -a1 -a2 tmp - | awk 'NF==5{print $1"\n"$2"\n+\n"$3 >"x1.fq";print $1"\n"$4"\n+\n"$5 >"x2.fq"}NF==3{print $1"\n"$2"\n+\n"$3>"orphan.fq"}'

sed -i '1,10s/^/<added text> /' file

Add line at the beginning of file, inplace

sed -i '1s/^/<added text> \n/' file

awk '{ for (i=1; i<=NF; i++) RtoC[i]= (RtoC[i]!=""? RtoC[i] FS $i: $i) } 
    END{ for (i in RtoC) print RtoC[i] }' infile

Merge two files by column and select a subset of columns in resultant file (see

Here there are two files, both two columns, and we merge column two from the first file with column one from the second.

pr -m -t -s\ file1 file2 | awk '{print $2,$3}' > out_file.txt

cat largefile | awk 'NR >= 10000 && NR <= 100000 { print }'

split multi-fasta file into individual fasta files, each named after the sequence from the fasta (see

cat hg18.fa | awk '{
        if (substr($0, 1, 1)==">") {filename=(substr($0,2) ".fa")}
        print $0 > filename

sed -i 's/[|].*$//' some.fasta