IMHO: Você tem duas perguntas:
how exactly are the inodes found, when the system is given a filepath
e
desire to grasp the way the b-trees are implemented in real life.
Talvez sua pergunta seja: como os inodes são armazenados (em uma b-tree) ou os dados que um inode faz referência. Se assim for, não posso responder isso.
No que diz respeito à primeira pergunta literal, a resposta, dependendo do sistema operacional, varia de not difficult
a opaque
. As chamadas clássicas do sistema (aberto, unlink) lerão o diretório procurando pela entrada do nome do arquivo - e irão iniciar (nos tempos modernos - com uma chamada para o opendir ()).
No clássico UNIX - o nome máximo do arquivo era 14 caracteres - que deixava 2 bytes para o inode (uma entrada de diretório era de 16 bytes). Não houve (e ainda não é afaik) uma organização b-tree de nomes de arquivos e inodes - a busca foi (e é?) Leitura serial do diretório. Isso cai na categoria simple
para examinar.
Mesmo em sistemas atuais - onde nomes de arquivos mais longos são permitidos - a aparência básica das entradas do diretório pode ser inalterada - 2 bytes (inode), 14 bytes (nome do arquivo inicial / nome completo do arquivo). (Pelo menos nos diretórios AIX - onde tudo - mesmo segue o adiom do UNIX: tudo é um arquivo, alguns são especiais.)
michael@x071:[/home/michael]ls -lia /tmp | head
total 287464
2 drwxrwxrwt 54 bin bin 36864 Jan 22 13:35 .
2 drwxr-xr-x 39 root system 4096 Jan 5 12:27 ..
5 drwxrwxrwt 2 root system 256 May 8 2013 .X11-unix
6 -rw-r----- 1 root system 0 May 23 2014 .ahafs.out.michael.10223652
7 -rw-r----- 1 root system 0 May 23 2014 .ahafs.out.michael.9502870
8 -r--r--r-- 1 root system 25 Jun 9 2013 .aix_ISMP_lock____.save
9 drwxrwxrwt 3 root system 4096 Dec 27 12:15 .com_ibm_tools_attach
62 -rw-r--r-- 1 root system 3124 Dec 27 11:21 .ctinst.log
63 -rw-r----- 1 michael felt 2578 Aug 16 2013 .htaccess
michael@x071:[/home/michael]od -dc /tmp | head -20
0000000 2 11776 0 0 0 0 0 0
michael@x067:~$ od -dc /tmp|head
od: /tmp: read error: Is a directory
0000000
002 . #define _D_NAME_MAX 255
struct dirent {
__ulong64_t d_offset; /* real off after this entry */
ino_t d_ino; /* inode number of entry */
ushort_t d_reclen; /* length of this record */
ushort_t d_namlen; /* length of string in d_name */
char d_name[_D_NAME_MAX+1]; /* name must be no longer than this */
/* redefine w/#define when name decided */
};
struct dirent
{
#ifndef __USE_FILE_OFFSET64
__ino_t d_ino;
__off_t d_off;
#else
__ino64_t d_ino;
__off64_t d_off;
#endif
unsigned short int d_reclen;
unsigned char d_type;
char d_name[256]; /* We must not include limits.h! */
};
/usr/include/dirent.h:
/* This is the data type of directory stream objects.
The actual structure is opaque to users. */
typedef struct __dirstream DIR;
extern DIR *opendir (__const char *__name) __nonnull ((1));
...
/* Read a directory entry from DIRP. Return a pointer to a 'struct
dirent' describing the entry, or NULL for EOF or error. The
storage returned may be overwritten by a later readdir call on the
same DIR stream.
If the Large File Support API is selected we have to use the
appropriate interface.
This function is a possible cancellation point and therefore not
marked with __THROW. */
#ifndef __USE_FILE_OFFSET64
extern struct dirent *readdir (DIR *__dirp) __nonnull ((1));
#else
# ifdef __REDIRECT
extern struct dirent *__REDIRECT (readdir, (DIR *__dirp), readdir64)
__nonnull ((1));
# else
# define readdir readdir64
# endif
#endif
#ifdef __USE_LARGEFILE64
extern struct dirent64 *readdir64 (DIR *__dirp) __nonnull ((1));
#endif
/*
* Definitions for library routines operating on directories.
*/
typedef struct _dirdesc {
#ifdef _ALL_SOURCE
int dd_fd; /* file descriptor of directory */
blksize_t dd_blksize; /* this filesystem's block size */
char *dd_buf; /* malloc'd buffer depending of fs bsize */
long dd_size; /* size of buffer */
long dd_flag; /* private flags for readdir, unused */
off_t dd_loc; /* logical(dirent) offset in directory */
off_t dd_curoff; /* real offset in directory corresponding
* to dd_loc */
#else
int __dd_fd; /* file descriptor of directory */
blksize_t __dd_blksize; /* this filesystem's block size */
char *__dd_buf; /* malloc'd buffer depending of fs bsize */
long __dd_size; /* size of buffer */
long __dd_flag; /* private flags for readdir, unused */
off_t __dd_loc; /* logical(dirent) offset in directory */
off_t __dd_curoff; /* real offset in directory corresponding
* to dd_loc */
#endif
#if defined(_THREAD_SAFE) && defined(_ALL_SOURCE)
void *dd_lock; /* for inter-thread locking */
#endif
} DIR;
...
extern DIR *opendir(const char *);
extern struct dirent *readdir(DIR *);
michael@x071:[/home/michael]ls -lia /tmp | head
total 287464
2 drwxrwxrwt 54 bin bin 36864 Jan 22 13:35 .
2 drwxr-xr-x 39 root system 4096 Jan 5 12:27 ..
5 drwxrwxrwt 2 root system 256 May 8 2013 .X11-unix
6 -rw-r----- 1 root system 0 May 23 2014 .ahafs.out.michael.10223652
7 -rw-r----- 1 root system 0 May 23 2014 .ahafs.out.michael.9502870
8 -r--r--r-- 1 root system 25 Jun 9 2013 .aix_ISMP_lock____.save
9 drwxrwxrwt 3 root system 4096 Dec 27 12:15 .com_ibm_tools_attach
62 -rw-r--r-- 1 root system 3124 Dec 27 11:21 .ctinst.log
63 -rw-r----- 1 michael felt 2578 Aug 16 2013 .htaccess
michael@x071:[/home/michael]od -dc /tmp | head -20
0000000 2 11776 0 0 0 0 0 0
michael@x067:~$ od -dc /tmp|head
od: /tmp: read error: Is a directory
0000000
002 . #define _D_NAME_MAX 255
struct dirent {
__ulong64_t d_offset; /* real off after this entry */
ino_t d_ino; /* inode number of entry */
ushort_t d_reclen; /* length of this record */
ushort_t d_namlen; /* length of string in d_name */
char d_name[_D_NAME_MAX+1]; /* name must be no longer than this */
/* redefine w/#define when name decided */
};
struct dirent
{
#ifndef __USE_FILE_OFFSET64
__ino_t d_ino;
__off_t d_off;
#else
__ino64_t d_ino;
__off64_t d_off;
#endif
unsigned short int d_reclen;
unsigned char d_type;
char d_name[256]; /* We must not include limits.h! */
};
/usr/include/dirent.h:
/* This is the data type of directory stream objects.
The actual structure is opaque to users. */
typedef struct __dirstream DIR;
extern DIR *opendir (__const char *__name) __nonnull ((1));
...
/* Read a directory entry from DIRP. Return a pointer to a 'struct
dirent' describing the entry, or NULL for EOF or error. The
storage returned may be overwritten by a later readdir call on the
same DIR stream.
If the Large File Support API is selected we have to use the
appropriate interface.
This function is a possible cancellation point and therefore not
marked with __THROW. */
#ifndef __USE_FILE_OFFSET64
extern struct dirent *readdir (DIR *__dirp) __nonnull ((1));
#else
# ifdef __REDIRECT
extern struct dirent *__REDIRECT (readdir, (DIR *__dirp), readdir64)
__nonnull ((1));
# else
# define readdir readdir64
# endif
#endif
#ifdef __USE_LARGEFILE64
extern struct dirent64 *readdir64 (DIR *__dirp) __nonnull ((1));
#endif
/*
* Definitions for library routines operating on directories.
*/
typedef struct _dirdesc {
#ifdef _ALL_SOURCE
int dd_fd; /* file descriptor of directory */
blksize_t dd_blksize; /* this filesystem's block size */
char *dd_buf; /* malloc'd buffer depending of fs bsize */
long dd_size; /* size of buffer */
long dd_flag; /* private flags for readdir, unused */
off_t dd_loc; /* logical(dirent) offset in directory */
off_t dd_curoff; /* real offset in directory corresponding
* to dd_loc */
#else
int __dd_fd; /* file descriptor of directory */
blksize_t __dd_blksize; /* this filesystem's block size */
char *__dd_buf; /* malloc'd buffer depending of fs bsize */
long __dd_size; /* size of buffer */
long __dd_flag; /* private flags for readdir, unused */
off_t __dd_loc; /* logical(dirent) offset in directory */
off_t __dd_curoff; /* real offset in directory corresponding
* to dd_loc */
#endif
#if defined(_THREAD_SAFE) && defined(_ALL_SOURCE)
void *dd_lock; /* for inter-thread locking */
#endif
} DIR;
...
extern DIR *opendir(const char *);
extern struct dirent *readdir(DIR *);
%pre% %pre% %pre% %pre% %pre% %pre% %pre% %pre%
0000020 2 11822 0 0 0 0 0 0
%pre% 002 . . %pre% %pre% %pre% %pre% %pre% %pre% %pre% %pre% %pre% %pre% %pre% %pre%
0000040 32848 11895 28530 27492 26994 11825 13109 13878
200 P . w o r k d i r . 1 3 5 6 6
0000060 5 11864 12593 11637 28265 30720 0 0
%pre% 005 . X 1 1 - u n i x %pre% %pre% %pre% %pre% %pre%
0000100 6 11873 26721 26227 11887 30068 11885 26979
%pre% 006 . a h a f s . o u t . m i c
0000120 7 11873 26721 26227 11887 30068 11885 26979
%pre% \a . a h a f s . o u t . m i c
0000140 8 11873 27000 24393 21325 20575 27759 25451
%pre% \b . a i x _ I S M P _ l o c k
0000160 9 11875 28525 24425 25197 24436 28527 27763
%pre% \t . c o m _ i b m _ t o o l s
0000200 62 11875 29801 28275 29742 27759 26368 0
%pre% > . c t i n s t . l o g %pre% %pre% %pre%
0000220 63 11880 29793 25443 25971 29440 0 0
%pre% ? . h t a c c e s s %pre% %pre% %pre% %pre% %pre%
%pre% %pre% %pre% %pre% %pre% %pre% %pre%
0000020 2 11822 0 0 0 0 0 0
%pre% 002 . . %pre% %pre% %pre% %pre% %pre% %pre% %pre% %pre% %pre% %pre% %pre% %pre%
0000040 32848 11895 28530 27492 26994 11825 13109 13878
200 P . w o r k d i r . 1 3 5 6 6
0000060 5 11864 12593 11637 28265 30720 0 0
%pre% 005 . X 1 1 - u n i x %pre% %pre% %pre% %pre% %pre%
0000100 6 11873 26721 26227 11887 30068 11885 26979
%pre% 006 . a h a f s . o u t . m i c
0000120 7 11873 26721 26227 11887 30068 11885 26979
%pre% \a . a h a f s . o u t . m i c
0000140 8 11873 27000 24393 21325 20575 27759 25451
%pre% \b . a i x _ I S M P _ l o c k
0000160 9 11875 28525 24425 25197 24436 28527 27763
%pre% \t . c o m _ i b m _ t o o l s
0000200 62 11875 29801 28275 29742 27759 26368 0
%pre% > . c t i n s t . l o g %pre% %pre% %pre%
0000220 63 11880 29793 25443 25971 29440 0 0
%pre% ? . h t a c c e s s %pre% %pre% %pre% %pre% %pre%
Nota: tentei acima em um sistema Linux - e talvez eles agora estejam organizados como um b-tree. Não faço ideia - porque recebo a seguinte saída:
%pre%Portanto, classicamente: inode "lookup / mapping" para um arquivo (especial) foram apenas os primeiros dois bytes de uma entrada de diretório. Os inodes estavam no disco ou armazenados na memória em, por exemplo, uma árvore B.
No AIX - a estrutura 'dirent' é "fácil de encontrar" em:
%pre%E no Linux (kernel 3.2.X.Y) - o arquivo de inclusão inclui e a estrutura é:
%pre%Ambos - imho - são basicamente iguais - as diferenças são neutralizadas pela forma como são retornadas pela chamada readdir ()
Abordagem Linux / GNU:
%pre%%pre%Note: I was unable to find anything for __dirstream - so I added the 'comment' about it being
opaque
No AIX:
%pre% Resumindo - imho - dependendo do sistema operacional, a estrutura on disk
pode ser bem simples ou opaque
. No kernel, não me surpreende que a organização seja opaque
, então você pode modificar a organização sem precisar alterar nenhum código de aplicativo.