Eu escrevi um script localizador de duplicatas no PowerShell usando o assembly WinSCP .NET .
O script primeiro itera uma árvore de diretórios remotos e procura arquivos com o mesmo tamanho. Quando encontra algum, por padrão, faz o download dos arquivos e compara-os localmente.
Se você sabe que o servidor suporta uma extensão de protocolo para calcular somas de verificação , você pode melhorar a eficiência do script adicionando a opção -remoteChecksumAlg
, para fazer o script solicitar ao servidor a soma de verificação, poupando o download do arquivo.
powershell.exe -File find_duplicates.ps1 -sessionUrl ftp://user:[email protected]/ -remotePath /path
O script é:
param (
# Use Generate URL function to obtain a value for -sessionUrl parameter.
$sessionUrl = "sftp://user:mypassword;[email protected]/",
[Parameter(Mandatory)]
$remotePath,
$remoteChecksumAlg = $Null
)
function FileChecksum ($remotePath)
{
if (!($checksums.ContainsKey($remotePath)))
{
if ($remoteChecksumAlg -eq $Null)
{
Write-Host "Downloading file $remotePath..."
# Download file
$localPath = [System.IO.Path]::GetTempFileName()
$transferResult = $session.GetFiles($remotePath, $localPath)
if ($transferResult.IsSuccess)
{
$stream = [System.IO.File]::OpenRead($localPath)
$checksum = [BitConverter]::ToString($sha1.ComputeHash($stream))
$stream.Dispose()
Write-Host "Downloaded file $remotePath checksum is $checksum"
Remove-Item $localPath
}
else
{
Write-Host ("Error downloading file ${remotePath}: " +
$transferResult.Failures[0])
$checksum = $False
}
}
else
{
Write-Host "Request checksum for file $remotePath..."
$buf = $session.CalculateFileChecksum($remoteChecksumAlg, $remotePath)
$checksum = [BitConverter]::ToString($buf)
Write-Host "File $remotePath checksum is $checksum"
}
$checksums[$remotePath] = $checksum
}
return $checksums[$remotePath]
}
function FindDuplicatesInDirectory ($remotePath)
{
Write-Host "Finding duplicates in directory $remotePath ..."
try
{
$directoryInfo = $session.ListDirectory($remotePath)
foreach ($fileInfo in $directoryInfo.Files)
{
$remoteFilePath = ($remotePath + "/" + $fileInfo.Name)
if ($fileInfo.IsDirectory)
{
# Skip references to current and parent directories
if (($fileInfo.Name -ne ".") -and
($fileInfo.Name -ne ".."))
{
# Recurse into subdirectories
FindDuplicatesInDirectory $remoteFilePath
}
}
else
{
Write-Host ("Found file $($fileInfo.FullName) " +
"with size $($fileInfo.Length)")
if ($sizes.ContainsKey($fileInfo.Length))
{
$checksum = FileChecksum($remoteFilePath)
foreach ($otherFilePath in $sizes[$fileInfo.Length])
{
$otherChecksum = FileChecksum($otherFilePath)
if ($checksum -eq $otherChecksum)
{
Write-Host ("Checksums of files $remoteFilePath and " +
"$otherFilePath are identical")
$duplicates[$remoteFilePath] = $otherFilePath
}
}
}
else
{
$sizes[$fileInfo.Length] = @()
}
$sizes[$fileInfo.Length] += $remoteFilePath
}
}
}
catch [Exception]
{
Write-Host "Error processing directory ${remotePath}: $($_.Exception.Message)"
}
}
try
{
# Load WinSCP .NET assembly
Add-Type -Path "WinSCPnet.dll"
# Setup session options from URL
$sessionOptions = New-Object WinSCP.SessionOptions
$sessionOptions.ParseUrl($sessionUrl)
$session = New-Object WinSCP.Session
$session.SessionLogPath = "session.log"
try
{
# Connect
$session.Open($sessionOptions)
$sizes = @{}
$checksums = @{}
$duplicates = @{}
$sha1 = [System.Security.Cryptography.SHA1]::Create()
# Start recursion
FindDuplicatesInDirectory $remotePath
}
finally
{
# Disconnect, clean up
$session.Dispose()
}
# Print results
Write-Host
if ($duplicates.Count -gt 0)
{
Write-Host "Duplicates found:"
foreach ($path1 in $duplicates.Keys)
{
Write-Host "$path1 <=> $($duplicates[$path1])"
}
}
else
{
Write-Host "No duplicates found."
}
exit 0
}
catch [Exception]
{
Write-Host "Error: $($_.Exception.Message)"
exit 1
}
Versão atualizada e aprimorada do script disponível como extensão WinSCP Encontre arquivos duplicados no servidor SFTP / FTP .
(eu sou o autor do WinSCP)