# # Example config file for mcelog # mcelog is the user space backend that decodes and process machine check events # (cpu hardware errors) reported by the CPU to the kernel # # general format #optionname = value # white space is not allowed in value currently, except at the end where it is dropped # # in general all command line options that are not commands work here # see man mcelog or mcelog --help for a list # e.g. to enable the --no-syslog option use #no-syslog = yes (or no to disable) # when the option has a argument #logfile = /tmp/logfile # by default, disable extended error logging on newer Intel processors no-imc-log = yes # below are the options which are not command line options # Set CPU type for which mcelog decodes events: #cpu = type # for valid values for type please see mcelog --help # If this value is set incorrectly the decoded output will be likely incorrect. # by default when this parameter is not set mcelog uses the CPU it is running on # on very new kernels the mcelog events reported by the kernel also carry # the CPU type which is used too when available and not overriden. # Enable daemon mode: #daemon = yes # By default mcelog just processes the currently pending events and exits. # in daemon mode it will keep running as a daemon in the background and poll # the kernel for events and then decode them. # Filter out known broken events by default filter = yes # don't log memory errors individually # they still get accounted if that is enabled #filter-memory-errors = yes # output in undecoded raw format to be easier machine readable # (default is decoded) #raw = yes # Set CPU Mhz to decode uptime from time stamp counter (output # unreliable, not needed on new kernels which report the event time # directly. A lot of systems don't have a linear time stamp clock # and the output is wrong then. # Normally mcelog tries to figure out if it the TSC is reliable # and only uses the current frequency then. # Setting a frequency forces timestamp decoding. # This setting is obsolete with modern kernels which report the time # directly. #cpumhz = 1800.00 # log output options # Log decoded machine checks in syslog (default stdout or syslog for daemon) #syslog = yes # Log decoded machine checks in syslog with error level #syslog-error = yes # Never log anything to syslog #no-syslog = yes # Append log output to logfile instead of stdout. Only when no syslog logging is active #logfile = filename # Use SMBIOS information to decode DIMMs (needs root) # This function is not recommended to use right now and generally not needed # The exception is memdb prepopulation, which is configured separately below. #dmi = no # when in daemon mode run as this user after set up # note that the triggers will run as this user too # setting this to non root will mean that triggers cannot take some corrective # action, like offlining objects #run-credentials-user = root # group to run as daemon with # default to the group of the run-credentials-user #run-credentials-group = nobody [server] # user allowed to access client socket. # when set to * match any # root is always allowed to access # default: root only client-user = root # group allowed to access mcelog # when no group is configured any group matches (but still user checking) # when set to * match any #client-group = root # path to the unix socket for client<->server communication # when no socket-path is configured the server will not start #socket-path = /var/run/mcelog-client # when mcelog starts it checks if a server is already running. timeout # for this check. #initial-ping-timeout = 2 # [dimm] # Is the in memory DIMM error tracking enabled? # Only works on systems with integrated memory controller and # which are supported # Only takes effect in daemon mode dimm-tracking-enabled = yes # Use DMI information from the BIOS to prepopulate DIMM database # Note this might not work with all BIOS and requires mcelog to run as root. # Alternative is to let mcelog create DIMM objects on demand. dmi-prepopulate = yes # # execute these triggers when the rate of corrected or uncorrected # errors per DIMM exceeds the threshold # Note when the hardware does not report DIMMs this might also # be per channel # The default of 10/24h is reasonable for server quality # DDR3 DIMMs as of 2009/10 #uc-error-trigger = dimm-error-trigger uc-error-threshold = 1 / 24h #ce-error-trigger = dimm-error-trigger ce-error-threshold = 10 / 24h [socket] # Memory error accounting per socket socket-tracking-enabled = yes # Threshold and trigger for uncorrected memory errors on a socket # mem-uc-error-trigger = socket-memory-error-trigger mem-uc-error-threshold = 100 / 24h # Threshold and trigger for corrected memory errors on a socket #mem-ce-error-trigger = socket-memory-error-trigger #mem-ce-error-threshold = 100 / 24h # Log socket error threshold explicitely? #mem-ce-error-log = yes [cache] # Processing of cache error thresholds reported by Intel CPUs #cache-threshold-trigger = cache-error-trigger # Should cache threshold events be logged explicitely? #cache-threshold-log = yes [page] # Memory error accouting per 4K memory page # Threshold for the correct memory errors trigger script memory-ce-threshold = 10 / 24h # Trigger script for corrected errors # memory-ce-trigger = page-error-trigger # Should page threshold events be logged explicitely? memory-ce-log = yes # specify the internal action in mcelog to exceeding a page error threshold # this is done in addition to executing the trigger script if available # off no action # account only account errors # soft try to soft-offline page without killing any processes # This requires an uptodate kernel. Might not be successfull. # hard try to hard-offline page by killing processes # Requires an uptodate kernel. Might not be successfull. # soft-then-hard First try to soft offline, then try hard offlining #memory-ce-action = off|account|soft|hard|soft-then-hard memory-ce-action = soft [trigger] # Maximum number of running triggers children-max = 2 # execute triggers in this directory directory = /etc/mcelog