A newer version of this documentation is available.

View Latest

Monitoring

      +

      Sync Gateway 2.5 includes a number of new metrics available through the Admin REST API (/_expvars). Those new metrics cover performance, resource utilization and health checks of the Sync Gateway nodes which are increasingly important as deployments scale to support a large number of connected Couchbase Lite clients.

      The following curl command requests the monitoring stats on the Admin REST API.

      curl -X GET "http://localhost:4985/_expvar" -H "accept: application/json"

      The response is a JSON object with the following schema.

      {
        "syncgateway": {
          "global": {
            "resource_utilization": {
              "admin_net_bytes_recv": 0,
              "admin_net_bytes_sent": 0,
              "error_count": 0,
              "go_memstats_heapalloc": 0,
              "go_memstats_heapidle": 0,
              "go_memstats_heapinuse": 0,
              "go_memstats_heapreleased": 0,
              "go_memstats_pausetotalns": 0,
              "go_memstats_stackinuse": 0,
              "go_memstats_stacksys": 0,
              "go_memstats_sys": 0,
              "goroutines_high_watermark": 0,
              "num_goroutines": 0,
              "process_cpu_percent_utilization": 0,
              "process_memory_resident": 0,
              "pub_net_bytes_recv": 0,
              "pub_net_bytes_sent": 0,
              "system_memory_total": 0,
              "warn_count": 0
            }
          },
          "per_db": {
            "$dbname": {
              "cache": {
                "chan_cache_active_revs": 0,
                "chan_cache_hits": 0,
                "chan_cache_max_entries": 0,
                "chan_cache_misses": 0,
                "chan_cache_num_channels": 0,
                "chan_cache_removal_revs": 0,
                "chan_cache_tombstone_revs": 0,
                "num_skipped_seqs": 0,
                "rev_cache_hits": 0,
                "rev_cache_misses": 0
              },
              "cbl_replication_pull": {
                "attachment_pull_bytes": 0,
                "attachment_pull_count": 0,
                "dcp_caching_count": 0,
                "dcp_caching_time": 0,
                "max_pending": 0,
                "num_pull_repl_active_continuous": 0,
                "num_pull_repl_active_one_shot": 0,
                "num_pull_repl_caught_up": 0,
                "num_pull_repl_since_zero": 0,
                "num_pull_repl_total_continuous": 0,
                "num_pull_repl_total_one_shot": 0,
                "request_changes_count": 0,
                "request_changes_time": 0,
                "rev_processing_time": 0,
                "rev_send_count": 0,
                "rev_send_latency": 0
              },
              "database": {
                "crc32c_match_count": 0,
                "dcp_caching_count": 0,
                "dcp_caching_time": 0,
                "dcp_received_count": 0,
                "dcp_received_time": 0,
                "doc_reads_bytes_blip": 0,
                "doc_writes_bytes": 0,
                "doc_writes_bytes_blip": 0,
                "num_doc_reads_blip": 0,
                "num_doc_reads_rest": 0,
                "num_doc_writes": 0,
                "num_replications_active": 0,
                "num_replications_total": 0,
                "sequence_get_count": 0,
                "sequence_released_count": 0,
                "sequence_reserved_count": 0,
                "warn_channels_per_doc_count": 0,
                "warn_grants_per_doc_count": 0,
                "warn_xattr_size_count": 0
              },
              "security": {
                "auth_failed_count": 0,
                "auth_success_count": 0,
                "num_access_errors": 0,
                "num_docs_rejected": 0,
                "total_auth_time": 0
              }
            }
          },
          "per_replication": {
            "$replname": {
              "sgr_docs_checked_sent": 0,
              "sgr_num_attachments_transferred": 0,
              "sgr_num_attachment_bytes_transferred": 0,
              "sgr_num_docs_failed_to_push": 0,
              "sgr_num_docs_pushed": 0
            }
          }
        }
      }

      syncgateway

      Monitoring stats

      global

      Global Sync Gateway stats

      resource_utilization

      Resource utilization stats

      admin_net_bytes_recv

      Total number of bytes received on the network interface that the Sync Gateway admin interface is bound to, since start-up.

      By default, that is the number of bytes received on 127.0.0.1 since start-up.

      Throughput on admin interface = admin_net_bytes_recv / admin_net_bytes_sent

      admin_net_bytes_sent

      Total number of bytes sent on the network interface that the Sync Gateway admin interface is bound to, since start-up.

      By default, that is the number of bytes sent on 127.0.0.1 since start-up.

      Throughput on admin interface = admin_net_bytes_recv / admin_net_bytes_sent

      error_count

      Number of errors logged.

      go_memstats_heapalloc

      Go memstats.HeapAlloc

      go_memstats_heapidle

      Go memstats.HeapIdle

      go_memstats_heapinuse

      Go memstats.HeapInuse

      go_memstats_heapreleased

      Go memstats.HeapReleased

      go_memstats_pausetotalns

      Go memstats.PauseTotalNs

      go_memstats_stackinuse

      Go memstats.StackInuse

      go_memstats_stacksys

      Go memstats.StackSys

      go_memstats_sys

      Go memstats.Sys

      goroutines_high_watermark

      Peak number of go routines since process start.

      num_goroutines

      Number of goroutines.

      process_cpu_percent_utilization

      CPU utilization (%).

      The CPU usage calculation is performed based on user and system CPU time and doesn’t include components such as iowait. Therefore, process_cpu_percent_utilization differs from the %Cpu value returned when running the top command.

      process_memory_resident

      Memory utilization (Resident Set Size) for the process in bytes.

      pub_net_bytes_recv

      Total number of bytes received on the network interface that the Sync Gateway public interface is bound to, since start-up.

      By default, that is the number of bytes received on 0.0.0.0 since start-up.

      Throughput on public interface = pub_net_bytes_recv / pub_net_bytes_sent

      pub_net_bytes_sent

      Total number of bytes sent on the network interface that the Sync Gateway public interface is bound to, since start-up.

      By default, that is the number of bytes sent on 0.0.0.0 since start-up.

      Throughput on public interface = pub_net_bytes_recv / pub_net_bytes_sent

      system_memory_total

      Total memory available on the system in bytes.

      warn_count

      Number of warnings logged.

      per_db

      Stats for each database declared in the config file

      $dbname

      Stats relative to a database declared in the config file.

      cache

      Stats relative to caching

      abandoned_seqs

      The number of skipped sequences that were not found after 60 minutes and were abandoned.

      chan_cache_active_revs

      The number of active revisions in the channel cache.

      chan_cache_hits

      Channel cache requests fully served by the cache.

      Channel Cache Hit Ratio = chan_cache_hits / (chan_cache_hits + chan_cache_misses)

      chan_cache_max_entries

      Size of the largest channel cache.

      Helps with channel cache tuning, and as a hint on cache size variation (when compared to average cache size).

      chan_cache_misses

      Channel cache requests not fully served by the cache.

      Channel Cache Hit Ratio = chan_cache_hits / (chan_cache_hits + chan_cache_misses)

      chan_cache_num_channels

      Number of channels being cached.

      Insight into total number of channels being cached - provides insight into potential max cache size (num channels * max_cache_size), as well as node usage.

      chan_cache_removal_revs

      The number of removal revisions in the channel cache.

      Acts as a reminder that removals must be considered when tuning the channel cache size. Also helps users understand whether they should be tuning tombstone retention policy (metadata purge interval), and running compact.

      chan_cache_tombstone_revs

      The number of tombstone revisions in the channel cache.

      Acts as a reminder that tombstones and removals must be considered when tuning the channel cache size. Also helps users understand whether they should be tuning tombstone retention policy (metadata purge interval), and running compact.

      num_skipped_seqs

      Number of skipped sequences.

      Helps with channel cache tuning, and as a hint on cache size variation (when compared to average cache size).

      rev_cache_hits

      Revision cache hits.

      Rev Cache Hit Ratio = rev_cache_hits / (rev_cache_hits + rev_cache_misses)

      rev_cache_misses

      Revision cache misses.

      Rev Cache Hit Ratio = rev_cache_hits / (rev_cache_hits + rev_cache_misses)

      cbl_replication_pull

      attachment_pull_bytes

      Average size of attachments pulled. This is the pre-compressed size.

      attachment_pull_count

      Number of attachments pulled.

      dcp_caching_count

      This metric can be used to calculate the time between seeing a change on the DCP feed and when it’s available in the channel cache.

      DCP cache latency = dcp_caching_time / dcp_caching_count

      dcp_caching_time

      This metric can be used to calculate the time between seeing a change on the DCP feed and when it’s available in the channel cache.

      DCP cache latency = dcp_caching_time / dcp_caching_count

      max_pending

      High watermark for number of documents buffered during feed processing, waiting on a missing earlier sequence.

      num_pull_repl_active_continuous

      Gauge representing the number of continuous pull replications in the active state.

      num_pull_repl_active_one_shot

      Gauge representing the number of one-shot pull replications in the active state.

      num_pull_repl_caught_up

      Gauge representing the number of replications which have caught up to the latest changes.

      num_pull_repl_since_zero

      Number of new replications starting per second (/_changes?since=0).

      num_pull_repl_total_continuous

      Gauge representing the number of continuous pull replications.

      num_pull_repl_total_one_shot

      Gauge representing the number of one-shot pull replications.

      request_changes_count

      This metric can be used to calculate the latency of _changes request.

      _changes request latency = request_changes_time / request_changes_count

      request_changes_time

      This metric can be used to calculate the latency of _changes request.

      _changes request latency = request_changes_time / request_changes_count

      rev_processing_time

      The total amount of time processing revisions.

      This metric can be used with rev_send_count to calculate the average processing time per revision.

      average processing time per revision = rev_processing_time / rev_send_count.

      rev_send_count

      The total amount of time processing revisions.

      This metric can be used with rev_send_count to calculate the average processing time per revision.

      average processing time per revision = rev_processing_time / rev_send_count.

      rev_send_latency

      In a pull replication, Sync Gateway sends a /_changes request to the client. The client responds with the list of revisions that it wants to receive.

      rev_send_latency is measuring the time between the client asking for some revisions via the /_changes response, and Sync Gateway sending that revision to the client.

      Measuring time from the /_changes response means that this stat will vary significantly depending on the changes batch size. A larger batch size will result in a spike of this stat, even if the processing time per revision is unchanged. A more useful stat might be the average processing time per revision (rev_processing_time / rev_send_count).

      database

      Stats relative to the database

      crc32c_match_count

      Count of instances during import when the document cas had changed, but the document body was not changed.

      dcp_caching_count

      Count of DCP mutations added to Sync Gateway’s channel cache. Can be used with dcp_caching_time to monitor cache processing latency.

      dcp_caching_time

      Time between DCP mutation arriving at Sync Gateway and being added to channel cache (aggregate).

      dcp_received_count

      Number of document mutations received by Sync Gateway over DCP.

      dcp_received_time

      Time between document write and document being received by Sync Gateway over DCP. If the document was written prior to Sync Gateway starting the feed, is measured as the time since the feed was started. Can be used to monitor DCP feed processing latency.

      doc_reads_bytes_blip

      Total number of bytes read via Couchbase Lite 2.x replication since Sync Gateway startup.

      doc_writes_bytes

      Total number of bytes written as part of document writes since Sync Gateway startup.

      doc_writes_bytes_blip

      Total number of bytes written as part of Couchbase Lite 2.x document writes since Sync Gateway startup.

      num_doc_reads_blip

      Count of the number of documents read via Couchbase Lite 2.x replication since Sync Gateway startup.

      num_doc_reads_rest

      Count of the number of documents read via the REST API since Sync Gateway startup. Includes Couchbase Lite 1.x replication.

      num_doc_writes

      Count of the number of documents written via any means since Sync Gateway startup.

      num_replications_active

      Approximate number of active replications. Only counts continuous pull replications.

      num_replications_total

      Count of the number of replications created since Sync Gateway startup.

      sequence_get_count

      Number of high sequence lookups.

      sequence_released_count

      Number of unused, reserved sequences released by Sync Gateway.

      sequence_reserved_count

      Number of sequences reserved by Sync Gateway.

      Security

      Stats relative to security

      auth_failed_count

      Number of unsuccessful authentications. Useful to monitor the number of authentication errors.

      auth_success_count

      Number of successful authentications. Useful to monitor the number of authenticated requests.

      num_access_errors

      Count of documents rejected by write access functions (requireAccess/requireRole/requireUser).

      num_docs_rejected

      Count of documents rejected by the sync function. Useful to debug sync function issues and identify unexpected incoming documents.

      total_auth_time

      Total time it took to authenticate the last incoming request.

      per_replication

      Stats for each replication between Sync Gateway instances declared in the config file.

      $replname

      Stats relative to a replication between two Sync Gateway instances declared in the config file.

      sgr_docs_checked_sent

      Number of documents checked for changes since start-up (via /{db}/_revs_diff).

      sgr_num_attachments_transferred

      Number of attachments transferred since start-up.

      sgr_num_attachment_bytes_transferred

      Number of attachment bytes transferred since start-up.

      sgr_num_docs_failed_to_push

      Number of documents that failed to be pushed since start-up.

      sgr_num_docs_pushed

      Number of documents that were pushed since start-up.