as chamadas mmap () do journalctl tornam incrivelmente lento

1

Sempre que executo journalctl , ele é executado de maneira incrivelmente lenta. Geralmente, leva minutos ou até horas para retornar resultados. Eu experimentei alguns outros problemas de desempenho na máquina afetada que pareciam estar relacionados a aplicativos intensivos de I / O; Eu recentemente substitui o cabo SATA que melhorou o desempenho, mas não resolveu completamente o problema. Eu corri S.M.A.R.T. testes em todas as unidades e eles saem bem, com base em esta lista de atributos "críticos" que prever falhas no disco.

Note que não terei acesso físico à máquina afetada por vários meses, infelizmente.

Eu corri journalctl -xn através de strace -T como root; aqui está uma amostra da saída:

fstatfs(23, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10246662, f_bavail=9257376, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000044>
open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 24 <0.000045>
fstat(24, {st_mode=S_IFREG|0640, st_size=67108864, ...}) = 0 <0.000044>
mmap(NULL, 4096, PROT_READ, MAP_SHARED, 24, 0) = 0x7fce771b7000 <0.000052>
mmap(NULL, 8388608, PROT_READ, MAP_SHARED, 24, 0) = 0x7fce6be4c000 <0.000101>
fstatfs(24, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10246681, f_bavail=9257367, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000044>
open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 25 <0.000040>
fstat(25, {st_mode=S_IFREG|0640, st_size=83886080, ...}) = 0 <0.000040>
mmap(NULL, 4096, PROT_READ, MAP_SHARED, 25, 0) = 0x7fce771b6000 <0.000177>
mmap(NULL, 8388608, PROT_READ, MAP_SHARED, 25, 0) = 0x7fce6b64c000 <0.000126>
fstatfs(25, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10246807, f_bavail=9257509, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000071>
open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 26 <0.000073>
fstat(26, {st_mode=S_IFREG|0640, st_size=67108864, ...}) = 0 <0.000046>
mmap(NULL, 4096, PROT_READ, MAP_SHARED, 26, 0) = 0x7fce771b5000 <0.554027>
mmap(NULL, 8388608, PROT_READ, MAP_SHARED, 26, 0) = 0x7fce6ae4c000 <0.000155>
fstatfs(26, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10248655, f_bavail=9257373, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000091>
open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 27 <0.000082>
fstat(27, {st_mode=S_IFREG|0640, st_size=8388608, ...}) = 0 <0.000064>
mmap(NULL, 4096, PROT_READ, MAP_SHARED, 27, 0) = 0x7fce771b4000 <0.129685>
mmap(NULL, 8388608, PROT_READ, MAP_SHARED, 27, 0) = 0x7fce6a64c000 <0.000119>
fstatfs(27, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10245778, f_bavail=9256592, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000043>
open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 28 <0.000043>
fstat(28, {st_mode=S_IFREG|0640, st_size=109051904, ...}) = 0 <0.000047>
mmap(NULL, 4096, PROT_READ, MAP_SHARED, 28, 0) = 0x7fce771b3000 <2.106414>
mmap(NULL, 8388608, PROT_READ, MAP_SHARED, 28, 0) = 0x7fce69e4c000 <0.000099>
fstatfs(28, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10247093, f_bavail=9257207, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000037>
open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 29 <0.000039>
fstat(29, {st_mode=S_IFREG|0640, st_size=50331648, ...}) = 0 <0.000034>
mmap(NULL, 4096, PROT_READ, MAP_SHARED, 29, 0) = 0x7fce771b2000 <0.000043>
mmap(NULL, 8388608, PROT_READ, MAP_SHARED, 29, 0) = 0x7fce6964c000 <0.000026>
open("/dev/urandom", O_RDONLY|O_NOCTTY|O_CLOEXEC) = 30 <0.000019>
read(30, "_\f77004U7N@036", 16) = 16 <0.000010>
close(30)                               = 0 <0.000010>
open("/dev/urandom", O_RDONLY|O_NOCTTY|O_CLOEXEC) = 30 <0.000014>
read(30, "u7660t
% find /var/log/journal | wc -l
115
% uname -a
Linux steevie 4.9.0-0.bpo.5-amd64 #1 SMP Debian 4.9.65-3+deb9u2~bpo8+1 (2017-01-05) x86_64 GNU/Linux
% vmstat
procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu-----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa st
 0  0 1495592 160420     80 4586516  199  179 15827  1391   15    8  7  5 37 51  0
T*261w", 16) = 16 <0.000011> close(30) = 0 <0.000010> fstatfs(29, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10246938, f_bavail=9257024, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000015> open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 30 <0.000014> fstat(30, {st_mode=S_IFREG|0640, st_size=58720256, ...}) = 0 <0.000010> mmap(NULL, 4096, PROT_READ, MAP_SHARED, 30, 0) = 0x7fce771b1000 <3.332606>

O processo sempre parece travar por um tempo em mmap() chamadas parecidas com a última. Eu usei dd if=/dev/zero of=testfile.tmp para fazer benchmark de E / S de disco. Ao fazer isso, observei a gravação em disco pairando em torno de 1.000 M / s (+/- 150 M / s na maior parte do tempo), embora ocasionalmente caísse para 300 M / s ou menos (indo até ~ 70 M / s). Quando eu matei, dd reportou 15689293824 bytes (16 GB) copied, 174.361 s, 90.0 MB/s . Curiosamente, ao executar o mesmo teste de trás para frente ( dd if=testfile.tmp of=/dev/null ), o desempenho de leitura do disco, conforme relatado por iotop , não ficou acima de 250 M / s, quase sempre abaixo de 100 M / s, e estava abaixo de 50 M / s s. Matando este segundo processo dd produzido 10573660160 bytes (11 GB) copied, 148.573 s, 71.2 MB/s .

Aqui estão mais algumas informações de depuração (observe que elas foram executadas depois que eu matei o processo journalctl -xn ):

% sudo btrfs filesystem show /
Label: '[root]'  uuid: 06c6565d-af6c-4123-9e42-3b6281f164e6
        Total devices 4 FS bytes used 990.93GiB
        devid    2 size 931.47GiB used 931.47GiB path /dev/mapper/sda2_crypt
        devid    4 size 109.45GiB used 109.45GiB path /dev/sda6
        devid    5 size 109.45GiB used 109.45GiB path /dev/sdb6
        devid    6 size 931.47GiB used 931.47GiB path /dev/sdc2

% sudo btrfs device usage /
/dev/mapper/sda2_crypt, ID: 2
   Device size:           931.47GiB
   Device slack:              0.00B
   Data,RAID1:            917.50GiB
   Metadata,RAID1:         13.94GiB
   System,RAID1:           32.00MiB
   Unallocated:             1.02MiB

/dev/sda6, ID: 4
   Device size:           109.45GiB
   Device slack:              0.00B
   Data,RAID1:            102.51GiB
   Metadata,RAID1:          6.94GiB
   Unallocated:             1.00MiB

/dev/sdb6, ID: 5
   Device size:           109.45GiB
   Device slack:              0.00B
   Data,RAID1:            104.42GiB
   Metadata,RAID1:          5.00GiB
   System,RAID1:           32.00MiB
   Unallocated:             3.00MiB

/dev/sdc2, ID: 6
   Device size:           931.47GiB
   Device slack:              0.00B
   Data,RAID1:            927.47GiB
   Metadata,RAID1:          4.00GiB
   Unallocated:             1.02MiB

Observe que o sistema de arquivos está em uma matriz RAID1 btrfs e um dos dispositivos está dentro de um contêiner LUKS (por motivos históricos):

fstatfs(23, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10246662, f_bavail=9257376, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000044>
open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 24 <0.000045>
fstat(24, {st_mode=S_IFREG|0640, st_size=67108864, ...}) = 0 <0.000044>
mmap(NULL, 4096, PROT_READ, MAP_SHARED, 24, 0) = 0x7fce771b7000 <0.000052>
mmap(NULL, 8388608, PROT_READ, MAP_SHARED, 24, 0) = 0x7fce6be4c000 <0.000101>
fstatfs(24, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10246681, f_bavail=9257367, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000044>
open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 25 <0.000040>
fstat(25, {st_mode=S_IFREG|0640, st_size=83886080, ...}) = 0 <0.000040>
mmap(NULL, 4096, PROT_READ, MAP_SHARED, 25, 0) = 0x7fce771b6000 <0.000177>
mmap(NULL, 8388608, PROT_READ, MAP_SHARED, 25, 0) = 0x7fce6b64c000 <0.000126>
fstatfs(25, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10246807, f_bavail=9257509, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000071>
open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 26 <0.000073>
fstat(26, {st_mode=S_IFREG|0640, st_size=67108864, ...}) = 0 <0.000046>
mmap(NULL, 4096, PROT_READ, MAP_SHARED, 26, 0) = 0x7fce771b5000 <0.554027>
mmap(NULL, 8388608, PROT_READ, MAP_SHARED, 26, 0) = 0x7fce6ae4c000 <0.000155>
fstatfs(26, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10248655, f_bavail=9257373, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000091>
open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 27 <0.000082>
fstat(27, {st_mode=S_IFREG|0640, st_size=8388608, ...}) = 0 <0.000064>
mmap(NULL, 4096, PROT_READ, MAP_SHARED, 27, 0) = 0x7fce771b4000 <0.129685>
mmap(NULL, 8388608, PROT_READ, MAP_SHARED, 27, 0) = 0x7fce6a64c000 <0.000119>
fstatfs(27, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10245778, f_bavail=9256592, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000043>
open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 28 <0.000043>
fstat(28, {st_mode=S_IFREG|0640, st_size=109051904, ...}) = 0 <0.000047>
mmap(NULL, 4096, PROT_READ, MAP_SHARED, 28, 0) = 0x7fce771b3000 <2.106414>
mmap(NULL, 8388608, PROT_READ, MAP_SHARED, 28, 0) = 0x7fce69e4c000 <0.000099>
fstatfs(28, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10247093, f_bavail=9257207, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000037>
open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 29 <0.000039>
fstat(29, {st_mode=S_IFREG|0640, st_size=50331648, ...}) = 0 <0.000034>
mmap(NULL, 4096, PROT_READ, MAP_SHARED, 29, 0) = 0x7fce771b2000 <0.000043>
mmap(NULL, 8388608, PROT_READ, MAP_SHARED, 29, 0) = 0x7fce6964c000 <0.000026>
open("/dev/urandom", O_RDONLY|O_NOCTTY|O_CLOEXEC) = 30 <0.000019>
read(30, "_\f77004U7N@036", 16) = 16 <0.000010>
close(30)                               = 0 <0.000010>
open("/dev/urandom", O_RDONLY|O_NOCTTY|O_CLOEXEC) = 30 <0.000014>
read(30, "u7660t
% find /var/log/journal | wc -l
115
% uname -a
Linux steevie 4.9.0-0.bpo.5-amd64 #1 SMP Debian 4.9.65-3+deb9u2~bpo8+1 (2017-01-05) x86_64 GNU/Linux
% vmstat
procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu-----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa st
 0  0 1495592 160420     80 4586516  199  179 15827  1391   15    8  7  5 37 51  0
T*261w", 16) = 16 <0.000011> close(30) = 0 <0.000010> fstatfs(29, {f_type=0x9123683e, f_bsize=4096, f_blocks=272871862, f_bfree=10246938, f_bavail=9257024, f_files=0, f_ffree=0, f_fsid={-1736151745, 782050752}, f_namelen=255, f_frsize=4096}) = 0 <0.000015> open("/var/log/journal/40b4c4d188900ed4b228417f53bce3a8/[email protected]~", O_RDONLY|O_CLOEXEC) = 30 <0.000014> fstat(30, {st_mode=S_IFREG|0640, st_size=58720256, ...}) = 0 <0.000010> mmap(NULL, 4096, PROT_READ, MAP_SHARED, 30, 0) = 0x7fce771b1000 <3.332606>

sda2_crypt aqui é enganoso; Na verdade, ele é apoiado por /dev/sdd2 . /dev/sda e /dev/sdb são SSDs; /dev/sdc e /dev/sdd são HDDs.

Eu estou no Debian 8. Feliz em postar smartctl logs, se necessário, mas eles são meio longos e é por isso que estou omitindo-os neste momento.

    
por strugee 20.01.2018 / 00:14

0 respostas