Eu tentei este programa, não o melhor (ele analisa duas vezes um arquivo e tem algum código duplicado), mas sinta-se à vontade para adaptá-lo às suas necessidades. Eu acho que faz o trabalho.
$ cat
use warnings;
use strict;
use Text::CSV_XS;
my (%col1, %col2);
my $csv = Text::CSV_XS->new(
{ empty_is_undef => 1 }
) or die "Error: " . Text::CSV_XS->error_diag();
chomp( my @data = <STDIN> );
## Read file and save first column in %col1 hash and second
## column in %col2 hash.
foreach my $line ( @data ) {
die "Error in parse of CSV file\n" unless $csv->parse( $line );
my @columns = $csv->fields();
$col1{ $columns[0] }++ if defined $columns[0];
$col2{ $columns[1] }++ if defined $columns[1];
foreach my $line ( @data ) {
die "Error in parse of CSV file\n" unless $csv->parse( $line );
my @columns = $csv->fields();
## Discard line if both columns are undefined.
next if !defined $columns[0] && !defined $columns[1];
## 1.- Undefined first column: Save second column in hash.
do { $col2{ $columns[1] } = 1; next } unless defined $columns[0];
## 2.- Both columns are defined: Sort them.
if ( defined $columns[0] && defined $columns[1] ) {
if ( index( $columns[0], $columns[1] ) > -1 ) {
# Line is sorted, print it.
print quote($columns[0]), ",", quote($columns[1]), "\n";
delete $col2{ $columns[1] };
} else {
# Line unsorted, search its equivalent in hash of second column
# and print.
my $key = $1 if $columns[0] =~ /^(\S*)/;
print quote($columns[0]), ",", ( exists $col2{ $key } ? quote($key) : "" ), "\n";
delete $col2{ $key } if exists $col2{ $key };
# Here, the second unsorted column, search its equivalent in first
# column. If not found print it now, else it will be printed later.
for my $str ( keys %col1 ) {
next LINE if index( $str, $columns[1] ) > -1;
print ",", quote($columns[1]), "\n";
## 3.- Undefined second column: Check if second column is saved in
## hash and join it with first column.
unless ( defined $columns[1] ) {
my $key = $1 if $columns[0] =~ /^(\S*)/;
print quote($columns[0]), ",", ( exists $col2{ $key } ? quote($key) : "" ), "\n";
delete $col2{ $key } if exists $col2{ $key };
sub quote {
my ($str) = $_[0];
$str =~ s/^(.*)$/"$1"/;
return $str;
Seu arquivo de dados:
"XYZ-ZTE-43255 serverB618 agreed","XYZ-ZTE-44432"
"XYZ-ZTE-52775 serverB110 agreed",
"XYZ-ZTE-79213 - serverB688 agreed",
"XYZ-ZTE-77323 serverB617 agreed",
"XYZ-ZTE-81422 - serverB609 agreed","XYZ-ZTE-77323"
"XYZ-ZTE-32785 - serverA626 agreed","XYZ-ZTE-52775"
"XYZ-ZTE-43235 - serverA605 disagreed (asdfjlasdj yxvv il lkyeas sadfa)","XYZ-ZTE-43235"
"XYZ-ZTE-11591 serverB144 agreed",
Os resultados:
$ perl <yourdatafile
"XYZ-ZTE-43255 serverB618 agreed","XYZ-ZTE-43255"
"XYZ-ZTE-52775 serverB110 agreed","XYZ-ZTE-52775"
"XYZ-ZTE-79213 - serverB688 agreed",
"XYZ-ZTE-77323 serverB617 agreed","XYZ-ZTE-77323"
"XYZ-ZTE-81422 - serverB609 agreed",
"XYZ-ZTE-32785 - serverA626 agreed",
"XYZ-ZTE-43235 - serverA605 disagreed (asdfjlasdj yxvv il lkyeas sadfa)","XYZ-ZTE-43235"
"XYZ-ZTE-11591 serverB144 agreed","XYZ-ZTE-11591"