O tipo de scripts quick'n'dirty que escrevo todos os dias:
#!/usr/bin/perl
#
use strict;
use warnings;
#data structures we're gonna need
my %positions; #how many times have we seen a given position
my %registered_lines; #the concatenated lines for the given position
my $dn; # the current dn section we're in
while (<>)
{
if (/^dn:/) #beginning of a new dn section (and end of the previous one)
{
my $printed = 0; #we want to print the dn line only once
foreach my $key (keys %positions) #we look at all positions seen in last section
{
if ($positions{$key} gt 1) # has the current position been seen more than once
{
print $dn unless $printed;
$printed = 1;
#print "position $key is repeated $positions{$key} times\n";
print $registered_lines{$key}; #print all the lines with the position
}
}
#reset variables for the next section
$dn = $_;
%positions = ();
%registered_lines = ();
}
if (/^rdcPosition/) #new line
{
/(\d+)$/; #have a look at the digits at the end of the line
my $pos = $1;
if (exists $positions{$pos}) #have we already seen this position
{
$positions{$pos} += 1; #increment the counter
$registered_lines{$pos} .= $_; #record the line
}
else
{
$positions{$pos} = 1;
$registered_lines{$pos} = $_;
}
}
}
Execute como:
perl script.pl < input_data_file