Eu recebi uma caixa hoje que não estava voltando de uma reinicialização. Depois de um bom bocado de trabalho com discos vivos / de resgate, me deparei com a situação em que estou agora preso. Basicamente, várias ferramentas de baixo nível ( ls, grep, etc ) são segmentadas - o que uma reinstalação corrige, mas continua revertendo.
Um dos vários programas de segfaulting é o grep. Um exemplo aleatório:
$ grep eth0 /etc/sysconfig/network-scripts/*
Segmentation fault
No entanto, uma reinstalação do pacote grep resolve o problema:
$ yum reinstall grep
Loaded plugins: fastestmirror
Setting up Reinstall Process
Loading mirror speeds from cached hostfile
[...]
Installed:
grep.i386 0:2.5.1-55.el5
Complete!
$ grep eth0 /etc/sysconfig/network-scripts/*
/etc/sysconfig/network-scripts/ifcfg-eth0:DEVICE=eth0
[...]
Mas quando a caixa é reiniciada, tudo é quebrado novamente! Eu posso até replicar isso simplesmente trocando os níveis de execução.
$ init 4
$ grep eth0 /etc/sysconfig/network-scripts/*
Segmentation fault
Posso repetir minha correção de reinstalação, mas depois alternar bhack para runlevels 5 e isso acontece novamente.
Eu incluí uma cópia de um strace para o comando grep abaixo, mas como eu digo, o efeito "ls" também, que eu também consertei com uma reinstalação do coreutils.
execve("//bin/grep", ["grep", "eth0", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", ...], [/* 24 vars */]) = 0
brk(0) = 0x9bd0000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=29251, ...}) = 0
mmap2(NULL, 29251, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7fe2000
close(3) = 0
open("/lib/libpcre.so.0", O_RDONLY) = 3
read(3, "7ELF$ grep eth0 /etc/sysconfig/network-scripts/*
Segmentation fault
$ yum reinstall grep
Loaded plugins: fastestmirror
Setting up Reinstall Process
Loading mirror speeds from cached hostfile
[...]
Installed:
grep.i386 0:2.5.1-55.el5
Complete!
$ grep eth0 /etc/sysconfig/network-scripts/*
/etc/sysconfig/network-scripts/ifcfg-eth0:DEVICE=eth0
[...]
$ init 4
$ grep eth0 /etc/sysconfig/network-scripts/*
Segmentation fault
execve("//bin/grep", ["grep", "eth0", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", "/etc/sysconfig/network-scripts/i", ...], [/* 24 vars */]) = 0
brk(0) = 0x9bd0000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=29251, ...}) = 0
mmap2(NULL, 29251, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7fe2000
close(3) = 0
open("/lib/libpcre.so.0", O_RDONLY) = 3
read(3, "7ELF%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%04%pre%%pre%%pre%"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=117448, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7fe1000
mmap2(NULL, 116176, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x1d3000
mmap2(0x1ef000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c) = 0x1ef000
close(3) = 0
open("/lib/libc.so.6", O_RDONLY) = 3
read(3, "7ELF%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%0_%pre%04%pre%%pre%%pre%"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=1686224, ...}) = 0
mmap2(NULL, 1410500, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x8aa000
mmap2(0x9fd000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x152) = 0x9fd000
mmap2(0xa00000, 9668, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xa00000
close(3) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7fe0000
set_thread_area({entry_number:-1 -> 6, base_addr:0xb7fe06c0, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}) = 0
mprotect(0x9fd000, 8192, PROT_READ) = 0
mprotect(0x818000, 4096, PROT_READ) = 0
munmap(0xb7fe2000, 29251) = 0
--- SIGSEGV (Segmentation fault) @ 0 (0) ---
+++ killed by SIGSEGV +++
%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%04%pre%%pre%%pre%"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=117448, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7fe1000
mmap2(NULL, 116176, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x1d3000
mmap2(0x1ef000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c) = 0x1ef000
close(3) = 0
open("/lib/libc.so.6", O_RDONLY) = 3
read(3, "7ELF%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%%pre%0_%pre%04%pre%%pre%%pre%"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=1686224, ...}) = 0
mmap2(NULL, 1410500, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x8aa000
mmap2(0x9fd000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x152) = 0x9fd000
mmap2(0xa00000, 9668, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xa00000
close(3) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7fe0000
set_thread_area({entry_number:-1 -> 6, base_addr:0xb7fe06c0, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}) = 0
mprotect(0x9fd000, 8192, PROT_READ) = 0
mprotect(0x818000, 4096, PROT_READ) = 0
munmap(0xb7fe2000, 29251) = 0
--- SIGSEGV (Segmentation fault) @ 0 (0) ---
+++ killed by SIGSEGV +++
Alguém tem algumas ideias inteligentes sobre o que está acontecendo? Não pretendo confiar nessa caixa (hardware de software), mas quero chegar ao fundo disso.