Falha na replicação do MongoDB

2

Estou tentando depurar um problema estranho que resulta na falha ou em um conjunto de réplicas tornando-se obsoleto e incapaz de recuperar o atraso com o principal (em execução no AWS, AWS linux).

O atraso típico de replicação é de < 1 seg.

Parece haver uma correlação com o desdobramento secundário desatualizado e o esgotamento primário do espaço de registro, ou seja, / var / log / mongodb /.

O oplog é armazenado em uma unidade diferente e isso tem > 50 GB de espaço livre.

Pergunta: A falha do primário em criar logs resulta em uma falha de replicação? Não consegui localizar nada nos documentos ou on-line que possa confirmar isso.

Editar para adicionar, saída de db.printReplicationInfo ():

rs0:PRIMARY> db.printReplicationInfo()
configured oplog size:   30720MB
log length start to end: 2238secs (0.62hrs)
oplog first event time:  Mon Nov 09 2015 16:36:13 GMT+0000 (UTC)
oplog last event time:   Mon Nov 09 2015 17:13:31 GMT+0000 (UTC)
now:                     Mon Nov 09 2015 17:13:31 GMT+0000 (UTC)

Edite para adicionar, saída completa do db.oplog.rs.stats local ():

{
    "ns" : "local.oplog.rs",
    "count" : 197021,
    "size" : 32227028695,
    "avgObjSize" : 163571,
    "storageSize" : 11432701952,
    "capped" : true,
    "max" : -1,
    "maxSize" : NumberLong("32212254720"),
    "wiredTiger" : {
        "metadata" : {
            "formatVersion" : 1,
            "oplogKeyExtractionVersion" : 1
        },
        "creationString" : "allocation_size=4KB,app_metadata=(formatVersion=1,oplogKeyExtractionVersion=1),block_allocation=best,block_compressor=snappy,cache_resident=0,checkpoint=(WiredTigerCheckpoint.44462=(addr=\"01e30119f281e455da0eb9e30d626b89e457acebd0e30d627488e4c14bdd50808080e502a9710fc0e502984f4fc0\",order=44462,time=1447092664,size=11145277440,write_gen=190060665)),checkpoint_lsn=(174452,45957760),checksum=on,collator=,columns=,dictionary=0,format=btree,huffman_key=,huffman_value=,id=9,internal_item_max=0,internal_key_max=0,internal_key_truncate=,internal_page_max=4KB,key_format=q,key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=64MB,memory_page_max=10m,os_cache_dirty_max=0,os_cache_max=0,prefix_compression=0,prefix_compression_min=4,split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,value_format=u,version=(major=1,minor=1)",
        "type" : "file",
        "uri" : "statistics:table:collection-6-8821970786560075304",
        "LSM" : {
            "bloom filters in the LSM tree" : 0,
            "bloom filter false positives" : 0,
            "bloom filter hits" : 0,
            "bloom filter misses" : 0,
            "bloom filter pages evicted from cache" : 0,
            "bloom filter pages read into cache" : 0,
            "total size of bloom filters" : 0,
            "sleep for LSM checkpoint throttle" : 0,
            "chunks in the LSM tree" : 0,
            "highest merge generation in the LSM tree" : 0,
            "queries that could have benefited from a Bloom filter that did not exist" : 0,
            "sleep for LSM merge throttle" : 0
        },
        "block-manager" : {
            "file allocation unit size" : 4096,
            "blocks allocated" : 190152428,
            "checkpoint size" : 11145277440,
            "allocations requiring file extension" : 225612,
            "blocks freed" : 189847759,
            "file magic number" : 120897,
            "file major version number" : 1,
            "minor version number" : 0,
            "file bytes available for reuse" : 93298688,
            "file size in bytes" : 11432701952
        },
        "btree" : {
            "btree checkpoint generation" : 44481,
            "column-store variable-size deleted values" : 0,
            "column-store fixed-size leaf pages" : 0,
            "column-store internal pages" : 0,
            "column-store variable-size leaf pages" : 0,
            "pages rewritten by compaction" : 0,
            "number of key/value pairs" : 0,
            "fixed-record size" : 0,
            "maximum tree depth" : 5,
            "maximum internal page key size" : 368,
            "maximum internal page size" : 4096,
            "maximum leaf page key size" : 3276,
            "maximum leaf page size" : 32768,
            "maximum leaf page value size" : 67108864,
            "overflow pages" : 0,
            "row-store internal pages" : 0,
            "row-store leaf pages" : 0
        },
        "cache" : {
            "bytes read into cache" : NumberLong("30740995584012"),
            "bytes written from cache" : NumberLong("29998939159917"),
            "checkpoint blocked page eviction" : 394628,
            "unmodified pages evicted" : 164578679,
            "page split during eviction deepened the tree" : 1173,
            "modified pages evicted" : 61796390,
            "data source pages selected for eviction unable to be evicted" : 10970507,
            "hazard pointer blocked page eviction" : 7932631,
            "internal pages evicted" : 3553083,
            "pages split during eviction" : 3825008,
            "in-memory page splits" : 15025,
            "overflow values cached in memory" : 0,
            "pages read into cache" : 175450372,
            "overflow pages read into cache" : 0,
            "pages written from cache" : 190063507
        },
        "compression" : {
            "raw compression call failed, no additional data available" : 0,
            "raw compression call failed, additional data available" : 0,
            "raw compression call succeeded" : 0,
            "compressed pages read" : 173302623,
            "compressed pages written" : 170315036,
            "page written failed to compress" : 22,
            "page written was too small to compress" : 19748044
        },
        "cursor" : {
            "create calls" : 4043133,
            "insert calls" : 230047485,
            "bulk-loaded cursor-insert calls" : 0,
            "cursor-insert key and value bytes inserted" : NumberLong("29150212316581"),
            "next calls" : 730226547,
            "prev calls" : 37,
            "remove calls" : 4040833,
            "cursor-remove key bytes removed" : 36367497,
            "reset calls" : 583047853,
            "search calls" : 328774626,
            "search near calls" : 5144225,
            "update calls" : 0,
            "cursor-update value bytes updated" : 0
        },
        "reconciliation" : {
            "dictionary matches" : 0,
            "internal page multi-block writes" : 79957,
            "leaf page multi-block writes" : 3847381,
            "maximum blocks required for a page" : 870,
            "internal-page overflow keys" : 0,
            "leaf-page overflow keys" : 0,
            "overflow values written" : 0,
            "pages deleted" : 9135126,
            "page checksum matches" : 2604439,
            "page reconciliation calls" : 62629072,
            "page reconciliation calls for eviction" : 60930354,
            "leaf page key bytes discarded using prefix compression" : 0,
            "internal page key bytes discarded using suffix compression" : 177193166
        },
        "session" : {
            "object compaction" : 0,
            "open cursor count" : 4043133
        },
        "transaction" : {
            "update conflicts" : 0
        }
    },
    "nindexes" : 0,
    "totalIndexSize" : 0,
    "indexSizes" : {

    },
    "ok" : 1,
    "$gleStats" : {
        "lastOpTime" : Timestamp(0, 0),
        "electionId" : ObjectId("5614f9ebaffca08a80d27834")
    }
}
    
por MarcF 09.11.2015 / 13:30

0 respostas

Tags