AWK
(funciona mesmo para linhas de entrada não ordenadas ):
awk '{ a[$1]=($1 in a? a[$1]",":"")$2 } # grouping 'stressors' by 'gene' names
END {
for (k in a) { # for each 'gene'
len=split(a[k], b, ","); # split 'stressors' string into array b
for (i=1;i<len;i++) # construct pairwise combinations
for (j=i+1;j<=len;j++) # between 'stressors'
print k,b[i],k,b[j]
}
}' file
A saída:
gene1 FishKairomones gene1 Microcystin
gene1 FishKairomones gene1 Calcium
gene1 Microcystin gene1 Calcium
gene2 Cadmium gene2 Microcystis
gene2 Cadmium gene2 FishKairomones
gene2 Cadmium gene2 Phosphorous
gene2 Microcystis gene2 FishKairomones
gene2 Microcystis gene2 Phosphorous
gene2 FishKairomones gene2 Phosphorous
gene3 FishKairomones gene3 Microcystin
gene3 FishKairomones gene3 Phosphorous
gene3 FishKairomones gene3 Cadmium
gene3 Microcystin gene3 Phosphorous
gene3 Microcystin gene3 Cadmium
gene3 Phosphorous gene3 Cadmium