rsync bloqueado - relendo os mesmos dados de novo e de novo

1

Eu tenho algum tipo de script de sincronização transferindo arquivos de um servidor de produção para um DR todos os dias via rsync.

no entanto, desde alguns dias, o script parece estar falhando.

eram várias instâncias dele em execução, então eu as matei todas e reiniciei o processo, no entanto, parece que não faz nada.

ao traçar o PID com strace, eu posso ver relendo os mesmos dados de novo e de novo.

os comandos executados pelo wrapper são:

root     4052277 30.5  0.1  61540 19280 pts/0    S+   11:00   8:32 /opt/csm/64-bit/deps/1.0.6/bin/rsync --archive --compress --recursive --rsh=/usr/bin/ssh -i /root/.ssh/id_rsa_drsync --rsync-path=/usr/bin/rsync --verbose /app/PWC/ xm3p7050vmo://app/PWC
root     4052278  0.0  0.0  52184  6096 pts/0    S+   11:00   0:00 /usr/bin/ssh -i /root/.ssh/id_rsa_drsync xm3p7050vmo /usr/bin/rsync --server -vlogDtprze.iLsf . //app/PWC

ao fazer um strace -p4052277 , eu recebo:

read(3, ",,N,,N,07/09/2017 04:00:17.000000000,D,pc_exe,D,07/09/2017 04:00:17.000000000,D,pc_exe,D,BASTITLE_T,D,wf_DWH_Global,D,s_"..., 262144) = 262144
read(3, ",N,,N,,N,,N,,N,,N,,N,,N,,N,,N,0,D,0,D,,N,,N,1,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N"..., 262144) = 262144
read(3, ",,N,,N,,N,,N,,N,,N,0,D,0,D,,N,,N,1,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,-1,D,,N"..., 262144) = 262144
read(3, ",D,BASTITLE_T,D,wf_DWH_Global,D,s_DWH_T_BASE_TITLE,D,19141,D\n0,D,22529353,D,88380,D,4,D,0174525.001.000.001,D,MC58119990"..., 262144) = 262144
read(3, " 19:54:21.000000000,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,07/09/2017"..., 262144) = 262144
read(3, ",,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,0,D,0,D,,N,,N,1,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,"..., 262144) = 262144
read(3, ",N,,N,,N,0,D,0,D,,N,,N,,N,,N,,N,10/29/2016 19:54:16.000000000,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,"..., 262144) = 262144
read(3, ",N/A,D,N/A,D,N/A,D,N/A,D,2201,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,-1,D,0,D,1,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N"..., 262144) = 262144
read(3, "7 04:00:17.000000000,D,pc_exe,D,07/09/2017 04:00:17.000000000,D,pc_exe,D,BASTITLE_T,D,wf_DWH_Global,D,s_DWH_T_BASE_TITLE"..., 262144) = 262144
read(3, ",D,86550,D,6,D,FX.FORW-0419518-L1-GBP,D,,N,,N,,N,,N,FX.FORW-0419518-L1-GBP,D,,N,,N,,N,,N,999,D,GBP,D,2108,D,999,D,N/A,D,"..., 262144) = 262144
read(3, "-Update: 07.07.2017 05:20:41,D,,N,,N,,N,,N,,N,,N,,N,30100,D,30107,D,30108,D,,N,,N,10/29/2016 00:00:00.000000000,D,0,D,,N"..., 262144) = 262144
read(3, ",,N,,N,,N,,N,,N,-1,D,,N,,N,,N,,N,RTW-Update: 07.07.2017 05:21:17,D,,N,,N,,N,,N,,N,0,D,,N,30100,D,30111,D,30112,D,,N,,N,1"..., 262144) = 262144
read(3, ",,N,,N,10/29/2016 19:54:13.000000000,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N"..., 262144) = 262144
read(3, ",2105,D,999,D,N/A,D,100,D,CPT-001,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,2201,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,-1,D,0,D,1,"..., 262144) = 262144
read(3, "0,D,,N,30100,D,30101,D,30102,D,,N,,N,10/29/2016 00:00:00.000000000,D,0,D,,N,0,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,0,D,0,D,,"..., 262144) = 262144
read(3, ",,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,RTW-Update: 07.07.2017 05:21:05,D,,N,,N,,N,,N,,N,,N,,N,30100,D,30107,D,30108,D,,N,,N,10/"..., 262144) = 262144
read(3, "7.000000000,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,07/09/2017 04:00:1"..., 262144) = 262144
read(3, "00000,D,pc_exe,D,07/09/2017 04:00:17.000000000,D,pc_exe,D,BASTITLE_T,D,wf_DWH_Global,D,s_DWH_T_BASE_TITLE,D,19141,D\n0,D,"..., 262144) = 262144
read(3, "D,N/A,D,N/A,D,N/A,D,N/A,D,0,D,0,D,1,D,0,D,,N,03/14/2016 00:00:00.000000000,D,03/14/2018 00:00:00.000000000,D,,N,,N,,N,,N"..., 262144) = 262144
read(3, ":17.000000000,D,pc_exe,D,BASTITLE_T,D,wf_DWH_Global,D,s_DWH_T_BASE_TITLE,D,19141,D\n0,D,22534150,D,83337,D,6,D,FX.FORW-03"..., 262144) = 262144
read(3, ",N,,N,,N,,N,RTW-Update: 07.07.2017 05:20:51,D,,N,,N,,N,,N,,N,,N,,N,30100,D,30107,D,30108,D,,N,,N,10/29/2016 00:00:00.000"..., 262144) = 262144
read(3, "00,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,07/09/2017 04:00:17.0000000"..., 262144) = 262144
read(3, "N,,N,,N,,N,10/29/2016 19:54:00.000000000,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,"..., 262144) = 262144
read(3, ",22535333,D,82141,D,6,D,FX.FORW-0373675-L2-CHF,D,,N,,N,,N,,N,FX.FORW-0373675-L2-CHF,D,,N,,N,,N,,N,999,D,CHF,D,2108,D,999"..., 262144) = 262144
read(3, "N/A,D,N/A,D,N/A,D,-1,D,0,D,1,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,0,D,0,D,,N,,N,1,D,0,D,,N,,N,,N,"..., 262144) = 262144
read(3, "4795-L1-USD,D,,N,,N,,N,,N,999,D,USD,D,2108,D,999,D,N/A,D,100,D,CHG-210,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,2204,D,N/A,D,N/A,"..., 262144) = 262144
read(3, "BASE_TITLE,D,19141,D\n0,D,22536220,D,82668,D,6,D,FX.FORW-0385515-L1-USD,D,,N,,N,,N,,N,FX.FORW-0385515-L1-USD,D,,N,,N,,N,,"..., 262144) = 262144
read(3, "N,,N,,N,,N,,N,0,D,0,D,,N,,N,,N,,N,,N,10/29/2016 19:53:33.000000000,D,101,D,,N,,N,0,D,0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N"..., 262144) = 262144
read(3, "2017 05:20:26,D,,N,,N,,N,,N,,N,0,D,,N,30100,D,30101,D,30102,D,,N,,N,10/29/2016 00:00:00.000000000,D,0,D,,N,0,D,0,D,,N,,N"..., 262144) = 262144
read(3, ",N,,N,,N,FX.FORW-0387703-L2-CHF,D,,N,,N,,N,,N,999,D,CHF,D,2108,D,999,D,N/A,D,100,D,CHG-210,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A"..., 262144) = 262144
read(3, "obal,D,s_DWH_T_BASE_TITLE,D,19141,D\n0,D,22537407,D,80668,D,4,D,0177715.001.000.756,D,MC5811999000010177715000426,D,,N,,N"..., 262144) = 262144
read(3, ",,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,RTW-Update: 07.07.2017 05:20:05,D,,N,,N,,N,,N,,N,"..., 262144) = 262144
read(3, "N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,07/09/2017 04:00:17.000000000,D,pc_exe,D,07/09/2017 04:00:17.00"..., 262144) = 262144
read(3, "10/29/2016 00:00:00.000000000,D,0,D,,N,0,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,0,D,0,D,,N,,N,,N,,N,,N,10/29/2016 19:53:33.000"..., 262144) = 262144
read(3, "/A,D,N/A,D,N/A,D,2201,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,N/A,D,-1,D,0,D,1,D,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,"..., 262144) = 262144
read(3, "1,D,,N,,N,,N,,N,RTW-Update: 07.07.2017 05:20:03,D,,N,,N,,N,,N,,N,0,D,,N,30100,D,30101,D,30102,D,,N,,N,10/29/2016 00:00:0"..., 262144) = 262144
read(3, "0,D,0,D,,N,,N,,N,,N,,N,,N,0,D,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,,N,07/09/2017 04:00:17.000000000,D,pc_exe,D,07/09/2"..., 262144) = 262144
read(3, "9141,D\n0,D,22539492,D,77738,D,4,D,0171560.001.000.001,D,MC5811999000010171560000159,D,,N,,N,,N,0171560.001.000.001,D,,N,"..., 262144) = 262144

apenas inundando esse mesmo bloco várias vezes (o FS é de cerca de 150Gbytes)

como posso investigar por que está fazendo isso? (estava funcionando perfeitamente antes) Eu não posso reiniciar a máquina como está em prod.

poderia estar relacionado a um link ruim ou a qualquer coisa na pasta? ou um arquivo em uso? (aparentemente nenhum está em uso, olhando para os processos em execução), há opções RSYNC adicionais que eu poderia adicionar para corrigir isso?

obrigado por suas dicas respeita,

    
por olivierg 13.09.2017 / 11:32

2 respostas

0

parece que meu sistema de arquivos cresce para 100% enquanto faz o rsync e é provavelmente por isso que ele falha!

Eu verei para estendê-lo e reexecutar o processo novamente (eu também tive um erro de chave ssh)

    
por 13.09.2017 / 14:02
1

Eu tentaria primeiro verificar se o rsync está funcionando corretamente. Crie um pequeno diretório de teste com alguns arquivos na origem e sincronize apenas com o destino para ver se funciona. Adicione também a opção --stats --verbose .

Acredito que isso possa estar relacionado à rede. Você pode verificar e ver se há algum problema de rede?

    
por 13.09.2017 / 12:14