Bug Summary

File:src/sentinel.c
Warning:line 1992, column 14
Although the value stored to 'slave' is used in the enclosing expression, the value is never actually read from 'slave'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name sentinel.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D REDIS_STATIC= -I ../deps/hiredis -I ../deps/linenoise -I ../deps/lua/src -I ../deps/hdr_histogram -D USE_JEMALLOC -I ../deps/jemalloc/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-c11-extensions -Wno-missing-field-initializers -std=c11 -fdebug-compilation-dir /home/netto/Desktop/redis-6.2.1/src -ferror-limit 19 -fmessage-length 0 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /tmp/scan-build-2021-03-14-133648-8817-1 -x c sentinel.c
1/* Redis Sentinel implementation
2 *
3 * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * * Neither the name of Redis nor the names of its contributors may be used
15 * to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "server.h"
32#include "hiredis.h"
33#ifdef USE_OPENSSL
34#include "openssl/ssl.h"
35#include "hiredis_ssl.h"
36#endif
37#include "async.h"
38
39#include <ctype.h>
40#include <arpa/inet.h>
41#include <sys/socket.h>
42#include <sys/wait.h>
43#include <fcntl.h>
44
45extern char **environ;
46
47#ifdef USE_OPENSSL
48extern SSL_CTX *redis_tls_ctx;
49extern SSL_CTX *redis_tls_client_ctx;
50#endif
51
52#define REDIS_SENTINEL_PORT26379 26379
53
54/* ======================== Sentinel global state =========================== */
55
56/* Address object, used to describe an ip:port pair. */
57typedef struct sentinelAddr {
58 char *hostname; /* Hostname OR address, as specified */
59 char *ip; /* Always a resolved address */
60 int port;
61} sentinelAddr;
62
63/* A Sentinel Redis Instance object is monitoring. */
64#define SRI_MASTER(1<<0) (1<<0)
65#define SRI_SLAVE(1<<1) (1<<1)
66#define SRI_SENTINEL(1<<2) (1<<2)
67#define SRI_S_DOWN(1<<3) (1<<3) /* Subjectively down (no quorum). */
68#define SRI_O_DOWN(1<<4) (1<<4) /* Objectively down (confirmed by others). */
69#define SRI_MASTER_DOWN(1<<5) (1<<5) /* A Sentinel with this flag set thinks that
70 its master is down. */
71#define SRI_FAILOVER_IN_PROGRESS(1<<6) (1<<6) /* Failover is in progress for
72 this master. */
73#define SRI_PROMOTED(1<<7) (1<<7) /* Slave selected for promotion. */
74#define SRI_RECONF_SENT(1<<8) (1<<8) /* SLAVEOF <newmaster> sent. */
75#define SRI_RECONF_INPROG(1<<9) (1<<9) /* Slave synchronization in progress. */
76#define SRI_RECONF_DONE(1<<10) (1<<10) /* Slave synchronized with new master. */
77#define SRI_FORCE_FAILOVER(1<<11) (1<<11) /* Force failover with master up. */
78#define SRI_SCRIPT_KILL_SENT(1<<12) (1<<12) /* SCRIPT KILL already sent on -BUSY */
79
80/* Note: times are in milliseconds. */
81#define SENTINEL_INFO_PERIOD10000 10000
82#define SENTINEL_PING_PERIOD1000 1000
83#define SENTINEL_ASK_PERIOD1000 1000
84#define SENTINEL_PUBLISH_PERIOD2000 2000
85#define SENTINEL_DEFAULT_DOWN_AFTER30000 30000
86#define SENTINEL_HELLO_CHANNEL"__sentinel__:hello" "__sentinel__:hello"
87#define SENTINEL_TILT_TRIGGER2000 2000
88#define SENTINEL_TILT_PERIOD(1000*30) (SENTINEL_PING_PERIOD1000*30)
89#define SENTINEL_DEFAULT_SLAVE_PRIORITY100 100
90#define SENTINEL_SLAVE_RECONF_TIMEOUT10000 10000
91#define SENTINEL_DEFAULT_PARALLEL_SYNCS1 1
92#define SENTINEL_MIN_LINK_RECONNECT_PERIOD15000 15000
93#define SENTINEL_DEFAULT_FAILOVER_TIMEOUT(60*3*1000) (60*3*1000)
94#define SENTINEL_MAX_PENDING_COMMANDS100 100
95#define SENTINEL_ELECTION_TIMEOUT10000 10000
96#define SENTINEL_MAX_DESYNC1000 1000
97#define SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG1 1
98#define SENTINEL_DEFAULT_RESOLVE_HOSTNAMES0 0
99#define SENTINEL_DEFAULT_ANNOUNCE_HOSTNAMES0 0
100
101/* Failover machine different states. */
102#define SENTINEL_FAILOVER_STATE_NONE0 0 /* No failover in progress. */
103#define SENTINEL_FAILOVER_STATE_WAIT_START1 1 /* Wait for failover_start_time*/
104#define SENTINEL_FAILOVER_STATE_SELECT_SLAVE2 2 /* Select slave to promote */
105#define SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE3 3 /* Slave -> Master */
106#define SENTINEL_FAILOVER_STATE_WAIT_PROMOTION4 4 /* Wait slave to change role */
107#define SENTINEL_FAILOVER_STATE_RECONF_SLAVES5 5 /* SLAVEOF newmaster */
108#define SENTINEL_FAILOVER_STATE_UPDATE_CONFIG6 6 /* Monitor promoted slave. */
109
110#define SENTINEL_MASTER_LINK_STATUS_UP0 0
111#define SENTINEL_MASTER_LINK_STATUS_DOWN1 1
112
113/* Generic flags that can be used with different functions.
114 * They use higher bits to avoid colliding with the function specific
115 * flags. */
116#define SENTINEL_NO_FLAGS0 0
117#define SENTINEL_GENERATE_EVENT(1<<16) (1<<16)
118#define SENTINEL_LEADER(1<<17) (1<<17)
119#define SENTINEL_OBSERVER(1<<18) (1<<18)
120
121/* Script execution flags and limits. */
122#define SENTINEL_SCRIPT_NONE0 0
123#define SENTINEL_SCRIPT_RUNNING1 1
124#define SENTINEL_SCRIPT_MAX_QUEUE256 256
125#define SENTINEL_SCRIPT_MAX_RUNNING16 16
126#define SENTINEL_SCRIPT_MAX_RUNTIME60000 60000 /* 60 seconds max exec time. */
127#define SENTINEL_SCRIPT_MAX_RETRY10 10
128#define SENTINEL_SCRIPT_RETRY_DELAY30000 30000 /* 30 seconds between retries. */
129
130/* SENTINEL SIMULATE-FAILURE command flags. */
131#define SENTINEL_SIMFAILURE_NONE0 0
132#define SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION(1<<0) (1<<0)
133#define SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION(1<<1) (1<<1)
134
135/* The link to a sentinelRedisInstance. When we have the same set of Sentinels
136 * monitoring many masters, we have different instances representing the
137 * same Sentinels, one per master, and we need to share the hiredis connections
138 * among them. Otherwise if 5 Sentinels are monitoring 100 masters we create
139 * 500 outgoing connections instead of 5.
140 *
141 * So this structure represents a reference counted link in terms of the two
142 * hiredis connections for commands and Pub/Sub, and the fields needed for
143 * failure detection, since the ping/pong time are now local to the link: if
144 * the link is available, the instance is available. This way we don't just
145 * have 5 connections instead of 500, we also send 5 pings instead of 500.
146 *
147 * Links are shared only for Sentinels: master and slave instances have
148 * a link with refcount = 1, always. */
149typedef struct instanceLink {
150 int refcount; /* Number of sentinelRedisInstance owners. */
151 int disconnected; /* Non-zero if we need to reconnect cc or pc. */
152 int pending_commands; /* Number of commands sent waiting for a reply. */
153 redisAsyncContext *cc; /* Hiredis context for commands. */
154 redisAsyncContext *pc; /* Hiredis context for Pub / Sub. */
155 mstime_t cc_conn_time; /* cc connection time. */
156 mstime_t pc_conn_time; /* pc connection time. */
157 mstime_t pc_last_activity; /* Last time we received any message. */
158 mstime_t last_avail_time; /* Last time the instance replied to ping with
159 a reply we consider valid. */
160 mstime_t act_ping_time; /* Time at which the last pending ping (no pong
161 received after it) was sent. This field is
162 set to 0 when a pong is received, and set again
163 to the current time if the value is 0 and a new
164 ping is sent. */
165 mstime_t last_ping_time; /* Time at which we sent the last ping. This is
166 only used to avoid sending too many pings
167 during failure. Idle time is computed using
168 the act_ping_time field. */
169 mstime_t last_pong_time; /* Last time the instance replied to ping,
170 whatever the reply was. That's used to check
171 if the link is idle and must be reconnected. */
172 mstime_t last_reconn_time; /* Last reconnection attempt performed when
173 the link was down. */
174} instanceLink;
175
176typedef struct sentinelRedisInstance {
177 int flags; /* See SRI_... defines */
178 char *name; /* Master name from the point of view of this sentinel. */
179 char *runid; /* Run ID of this instance, or unique ID if is a Sentinel.*/
180 uint64_t config_epoch; /* Configuration epoch. */
181 sentinelAddr *addr; /* Master host. */
182 instanceLink *link; /* Link to the instance, may be shared for Sentinels. */
183 mstime_t last_pub_time; /* Last time we sent hello via Pub/Sub. */
184 mstime_t last_hello_time; /* Only used if SRI_SENTINEL is set. Last time
185 we received a hello from this Sentinel
186 via Pub/Sub. */
187 mstime_t last_master_down_reply_time; /* Time of last reply to
188 SENTINEL is-master-down command. */
189 mstime_t s_down_since_time; /* Subjectively down since time. */
190 mstime_t o_down_since_time; /* Objectively down since time. */
191 mstime_t down_after_period; /* Consider it down after that period. */
192 mstime_t info_refresh; /* Time at which we received INFO output from it. */
193 dict *renamed_commands; /* Commands renamed in this instance:
194 Sentinel will use the alternative commands
195 mapped on this table to send things like
196 SLAVEOF, CONFING, INFO, ... */
197
198 /* Role and the first time we observed it.
199 * This is useful in order to delay replacing what the instance reports
200 * with our own configuration. We need to always wait some time in order
201 * to give a chance to the leader to report the new configuration before
202 * we do silly things. */
203 int role_reported;
204 mstime_t role_reported_time;
205 mstime_t slave_conf_change_time; /* Last time slave master addr changed. */
206
207 /* Master specific. */
208 dict *sentinels; /* Other sentinels monitoring the same master. */
209 dict *slaves; /* Slaves for this master instance. */
210 unsigned int quorum;/* Number of sentinels that need to agree on failure. */
211 int parallel_syncs; /* How many slaves to reconfigure at same time. */
212 char *auth_pass; /* Password to use for AUTH against master & replica. */
213 char *auth_user; /* Username for ACLs AUTH against master & replica. */
214
215 /* Slave specific. */
216 mstime_t master_link_down_time; /* Slave replication link down time. */
217 int slave_priority; /* Slave priority according to its INFO output. */
218 mstime_t slave_reconf_sent_time; /* Time at which we sent SLAVE OF <new> */
219 struct sentinelRedisInstance *master; /* Master instance if it's slave. */
220 char *slave_master_host; /* Master host as reported by INFO */
221 int slave_master_port; /* Master port as reported by INFO */
222 int slave_master_link_status; /* Master link status as reported by INFO */
223 unsigned long long slave_repl_offset; /* Slave replication offset. */
224 /* Failover */
225 char *leader; /* If this is a master instance, this is the runid of
226 the Sentinel that should perform the failover. If
227 this is a Sentinel, this is the runid of the Sentinel
228 that this Sentinel voted as leader. */
229 uint64_t leader_epoch; /* Epoch of the 'leader' field. */
230 uint64_t failover_epoch; /* Epoch of the currently started failover. */
231 int failover_state; /* See SENTINEL_FAILOVER_STATE_* defines. */
232 mstime_t failover_state_change_time;
233 mstime_t failover_start_time; /* Last failover attempt start time. */
234 mstime_t failover_timeout; /* Max time to refresh failover state. */
235 mstime_t failover_delay_logged; /* For what failover_start_time value we
236 logged the failover delay. */
237 struct sentinelRedisInstance *promoted_slave; /* Promoted slave instance. */
238 /* Scripts executed to notify admin or reconfigure clients: when they
239 * are set to NULL no script is executed. */
240 char *notification_script;
241 char *client_reconfig_script;
242 sds info; /* cached INFO output */
243} sentinelRedisInstance;
244
245/* Main state. */
246struct sentinelState {
247 char myid[CONFIG_RUN_ID_SIZE40+1]; /* This sentinel ID. */
248 uint64_t current_epoch; /* Current epoch. */
249 dict *masters; /* Dictionary of master sentinelRedisInstances.
250 Key is the instance name, value is the
251 sentinelRedisInstance structure pointer. */
252 int tilt; /* Are we in TILT mode? */
253 int running_scripts; /* Number of scripts in execution right now. */
254 mstime_t tilt_start_time; /* When TITL started. */
255 mstime_t previous_time; /* Last time we ran the time handler. */
256 list *scripts_queue; /* Queue of user scripts to execute. */
257 char *announce_ip; /* IP addr that is gossiped to other sentinels if
258 not NULL. */
259 int announce_port; /* Port that is gossiped to other sentinels if
260 non zero. */
261 unsigned long simfailure_flags; /* Failures simulation. */
262 int deny_scripts_reconfig; /* Allow SENTINEL SET ... to change script
263 paths at runtime? */
264 char *sentinel_auth_pass; /* Password to use for AUTH against other sentinel */
265 char *sentinel_auth_user; /* Username for ACLs AUTH against other sentinel. */
266 int resolve_hostnames; /* Support use of hostnames, assuming DNS is well configured. */
267 int announce_hostnames; /* Announce hostnames instead of IPs when we have them. */
268} sentinel;
269
270/* A script execution job. */
271typedef struct sentinelScriptJob {
272 int flags; /* Script job flags: SENTINEL_SCRIPT_* */
273 int retry_num; /* Number of times we tried to execute it. */
274 char **argv; /* Arguments to call the script. */
275 mstime_t start_time; /* Script execution time if the script is running,
276 otherwise 0 if we are allowed to retry the
277 execution at any time. If the script is not
278 running and it's not 0, it means: do not run
279 before the specified time. */
280 pid_t pid; /* Script execution pid. */
281} sentinelScriptJob;
282
283/* ======================= hiredis ae.c adapters =============================
284 * Note: this implementation is taken from hiredis/adapters/ae.h, however
285 * we have our modified copy for Sentinel in order to use our allocator
286 * and to have full control over how the adapter works. */
287
288typedef struct redisAeEvents {
289 redisAsyncContext *context;
290 aeEventLoop *loop;
291 int fd;
292 int reading, writing;
293} redisAeEvents;
294
295static void redisAeReadEvent(aeEventLoop *el, int fd, void *privdata, int mask) {
296 ((void)el); ((void)fd); ((void)mask);
297
298 redisAeEvents *e = (redisAeEvents*)privdata;
299 redisAsyncHandleRead(e->context);
300}
301
302static void redisAeWriteEvent(aeEventLoop *el, int fd, void *privdata, int mask) {
303 ((void)el); ((void)fd); ((void)mask);
304
305 redisAeEvents *e = (redisAeEvents*)privdata;
306 redisAsyncHandleWrite(e->context);
307}
308
309static void redisAeAddRead(void *privdata) {
310 redisAeEvents *e = (redisAeEvents*)privdata;
311 aeEventLoop *loop = e->loop;
312 if (!e->reading) {
313 e->reading = 1;
314 aeCreateFileEvent(loop,e->fd,AE_READABLE1,redisAeReadEvent,e);
315 }
316}
317
318static void redisAeDelRead(void *privdata) {
319 redisAeEvents *e = (redisAeEvents*)privdata;
320 aeEventLoop *loop = e->loop;
321 if (e->reading) {
322 e->reading = 0;
323 aeDeleteFileEvent(loop,e->fd,AE_READABLE1);
324 }
325}
326
327static void redisAeAddWrite(void *privdata) {
328 redisAeEvents *e = (redisAeEvents*)privdata;
329 aeEventLoop *loop = e->loop;
330 if (!e->writing) {
331 e->writing = 1;
332 aeCreateFileEvent(loop,e->fd,AE_WRITABLE2,redisAeWriteEvent,e);
333 }
334}
335
336static void redisAeDelWrite(void *privdata) {
337 redisAeEvents *e = (redisAeEvents*)privdata;
338 aeEventLoop *loop = e->loop;
339 if (e->writing) {
340 e->writing = 0;
341 aeDeleteFileEvent(loop,e->fd,AE_WRITABLE2);
342 }
343}
344
345static void redisAeCleanup(void *privdata) {
346 redisAeEvents *e = (redisAeEvents*)privdata;
347 redisAeDelRead(privdata);
348 redisAeDelWrite(privdata);
349 zfree(e);
350}
351
352static int redisAeAttach(aeEventLoop *loop, redisAsyncContext *ac) {
353 redisContext *c = &(ac->c);
354 redisAeEvents *e;
355
356 /* Nothing should be attached when something is already attached */
357 if (ac->ev.data != NULL((void*)0))
358 return C_ERR-1;
359
360 /* Create container for context and r/w events */
361 e = (redisAeEvents*)zmalloc(sizeof(*e));
362 e->context = ac;
363 e->loop = loop;
364 e->fd = c->fd;
365 e->reading = e->writing = 0;
366
367 /* Register functions to start/stop listening for events */
368 ac->ev.addRead = redisAeAddRead;
369 ac->ev.delRead = redisAeDelRead;
370 ac->ev.addWrite = redisAeAddWrite;
371 ac->ev.delWrite = redisAeDelWrite;
372 ac->ev.cleanup = redisAeCleanup;
373 ac->ev.data = e;
374
375 return C_OK0;
376}
377
378/* ============================= Prototypes ================================= */
379
380void sentinelLinkEstablishedCallback(const redisAsyncContext *c, int status);
381void sentinelDisconnectCallback(const redisAsyncContext *c, int status);
382void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privdata);
383sentinelRedisInstance *sentinelGetMasterByName(char *name);
384char *sentinelGetSubjectiveLeader(sentinelRedisInstance *master);
385char *sentinelGetObjectiveLeader(sentinelRedisInstance *master);
386int yesnotoi(char *s);
387void instanceLinkConnectionError(const redisAsyncContext *c);
388const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri);
389void sentinelAbortFailover(sentinelRedisInstance *ri);
390void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, const char *fmt, ...);
391sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master);
392void sentinelScheduleScriptExecution(char *path, ...);
393void sentinelStartFailover(sentinelRedisInstance *master);
394void sentinelDiscardReplyCallback(redisAsyncContext *c, void *reply, void *privdata);
395int sentinelSendSlaveOf(sentinelRedisInstance *ri, const sentinelAddr *addr);
396char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch);
397void sentinelFlushConfig(void);
398void sentinelGenerateInitialMonitorEvents(void);
399int sentinelSendPing(sentinelRedisInstance *ri);
400int sentinelForceHelloUpdateForMaster(sentinelRedisInstance *master);
401sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *ip, int port, char *runid);
402void sentinelSimFailureCrash(void);
403
404/* ========================= Dictionary types =============================== */
405
406uint64_t dictSdsHash(const void *key);
407uint64_t dictSdsCaseHash(const void *key);
408int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2);
409int dictSdsKeyCaseCompare(void *privdata, const void *key1, const void *key2);
410void releaseSentinelRedisInstance(sentinelRedisInstance *ri);
411
412void dictInstancesValDestructor (void *privdata, void *obj) {
413 UNUSED(privdata)((void) privdata);
414 releaseSentinelRedisInstance(obj);
415}
416
417/* Instance name (sds) -> instance (sentinelRedisInstance pointer)
418 *
419 * also used for: sentinelRedisInstance->sentinels dictionary that maps
420 * sentinels ip:port to last seen time in Pub/Sub hello message. */
421dictType instancesDictType = {
422 dictSdsHash, /* hash function */
423 NULL((void*)0), /* key dup */
424 NULL((void*)0), /* val dup */
425 dictSdsKeyCompare, /* key compare */
426 NULL((void*)0), /* key destructor */
427 dictInstancesValDestructor,/* val destructor */
428 NULL((void*)0) /* allow to expand */
429};
430
431/* Instance runid (sds) -> votes (long casted to void*)
432 *
433 * This is useful into sentinelGetObjectiveLeader() function in order to
434 * count the votes and understand who is the leader. */
435dictType leaderVotesDictType = {
436 dictSdsHash, /* hash function */
437 NULL((void*)0), /* key dup */
438 NULL((void*)0), /* val dup */
439 dictSdsKeyCompare, /* key compare */
440 NULL((void*)0), /* key destructor */
441 NULL((void*)0), /* val destructor */
442 NULL((void*)0) /* allow to expand */
443};
444
445/* Instance renamed commands table. */
446dictType renamedCommandsDictType = {
447 dictSdsCaseHash, /* hash function */
448 NULL((void*)0), /* key dup */
449 NULL((void*)0), /* val dup */
450 dictSdsKeyCaseCompare, /* key compare */
451 dictSdsDestructor, /* key destructor */
452 dictSdsDestructor, /* val destructor */
453 NULL((void*)0) /* allow to expand */
454};
455
456/* =========================== Initialization =============================== */
457
458void sentinelCommand(client *c);
459void sentinelInfoCommand(client *c);
460void sentinelSetCommand(client *c);
461void sentinelPublishCommand(client *c);
462void sentinelRoleCommand(client *c);
463void sentinelConfigGetCommand(client *c);
464void sentinelConfigSetCommand(client *c);
465
466struct redisCommand sentinelcmds[] = {
467 {"ping",pingCommand,1,"fast @connection",0,NULL((void*)0),0,0,0,0,0},
468 {"sentinel",sentinelCommand,-2,"admin",0,NULL((void*)0),0,0,0,0,0},
469 {"subscribe",subscribeCommand,-2,"pub-sub",0,NULL((void*)0),0,0,0,0,0},
470 {"unsubscribe",unsubscribeCommand,-1,"pub-sub",0,NULL((void*)0),0,0,0,0,0},
471 {"psubscribe",psubscribeCommand,-2,"pub-sub",0,NULL((void*)0),0,0,0,0,0},
472 {"punsubscribe",punsubscribeCommand,-1,"pub-sub",0,NULL((void*)0),0,0,0,0,0},
473 {"publish",sentinelPublishCommand,3,"pub-sub fast",0,NULL((void*)0),0,0,0,0,0},
474 {"info",sentinelInfoCommand,-1,"random @dangerous",0,NULL((void*)0),0,0,0,0,0},
475 {"role",sentinelRoleCommand,1,"fast read-only @dangerous",0,NULL((void*)0),0,0,0,0,0},
476 {"client",clientCommand,-2,"admin random @connection",0,NULL((void*)0),0,0,0,0,0},
477 {"shutdown",shutdownCommand,-1,"admin",0,NULL((void*)0),0,0,0,0,0},
478 {"auth",authCommand,-2,"no-auth fast @connection",0,NULL((void*)0),0,0,0,0,0},
479 {"hello",helloCommand,-1,"no-auth fast @connection",0,NULL((void*)0),0,0,0,0,0},
480 {"acl",aclCommand,-2,"admin",0,NULL((void*)0),0,0,0,0,0,0},
481 {"command",commandCommand,-1, "random @connection", 0,NULL((void*)0),0,0,0,0,0,0}
482};
483
484/* this array is used for sentinel config lookup, which need to be loaded
485 * before monitoring masters config to avoid dependency issues */
486const char *preMonitorCfgName[] = {
487 "announce-ip",
488 "announce-port",
489 "deny-scripts-reconfig",
490 "sentinel-user",
491 "sentinel-pass",
492 "current-epoch",
493 "myid",
494 "resolve-hostnames",
495 "announce-hostnames"
496};
497
498/* This function overwrites a few normal Redis config default with Sentinel
499 * specific defaults. */
500void initSentinelConfig(void) {
501 server.port = REDIS_SENTINEL_PORT26379;
502 server.protected_mode = 0; /* Sentinel must be exposed. */
503}
504
505void freeSentinelLoadQueueEntry(void *item);
506
507/* Perform the Sentinel mode initialization. */
508void initSentinel(void) {
509 unsigned int j;
510
511 /* Remove usual Redis commands from the command table, then just add
512 * the SENTINEL command. */
513 dictEmpty(server.commands,NULL((void*)0));
514 dictEmpty(server.orig_commands,NULL((void*)0));
515 ACLClearCommandID();
516 for (j = 0; j < sizeof(sentinelcmds)/sizeof(sentinelcmds[0]); j++) {
517 int retval;
518 struct redisCommand *cmd = sentinelcmds+j;
519 cmd->id = ACLGetCommandID(cmd->name); /* Assign the ID used for ACL. */
520 retval = dictAdd(server.commands, sdsnew(cmd->name), cmd);
521 serverAssert(retval == DICT_OK)((retval == 0)?(void)0 : (_serverAssert("retval == DICT_OK","sentinel.c"
,521),__builtin_unreachable()))
;
522 retval = dictAdd(server.orig_commands, sdsnew(cmd->name), cmd);
523 serverAssert(retval == DICT_OK)((retval == 0)?(void)0 : (_serverAssert("retval == DICT_OK","sentinel.c"
,523),__builtin_unreachable()))
;
524
525 /* Translate the command string flags description into an actual
526 * set of flags. */
527 if (populateCommandTableParseFlags(cmd,cmd->sflags) == C_ERR-1)
528 serverPanic("Unsupported command flag")_serverPanic("sentinel.c",528,"Unsupported command flag"),__builtin_unreachable
()
;
529 }
530
531 /* Initialize various data structures. */
532 sentinel.current_epoch = 0;
533 sentinel.masters = dictCreate(&instancesDictType,NULL((void*)0));
534 sentinel.tilt = 0;
535 sentinel.tilt_start_time = 0;
536 sentinel.previous_time = mstime();
537 sentinel.running_scripts = 0;
538 sentinel.scripts_queue = listCreate();
539 sentinel.announce_ip = NULL((void*)0);
540 sentinel.announce_port = 0;
541 sentinel.simfailure_flags = SENTINEL_SIMFAILURE_NONE0;
542 sentinel.deny_scripts_reconfig = SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG1;
543 sentinel.sentinel_auth_pass = NULL((void*)0);
544 sentinel.sentinel_auth_user = NULL((void*)0);
545 sentinel.resolve_hostnames = SENTINEL_DEFAULT_RESOLVE_HOSTNAMES0;
546 sentinel.announce_hostnames = SENTINEL_DEFAULT_ANNOUNCE_HOSTNAMES0;
547 memset(sentinel.myid,0,sizeof(sentinel.myid));
548 server.sentinel_config = NULL((void*)0);
549}
550
551/* This function gets called when the server is in Sentinel mode, started,
552 * loaded the configuration, and is ready for normal operations. */
553void sentinelIsRunning(void) {
554 int j;
555
556 if (server.configfile == NULL((void*)0)) {
557 serverLog(LL_WARNING3,
558 "Sentinel started without a config file. Exiting...");
559 exit(1);
560 } else if (access(server.configfile,W_OK2) == -1) {
561 serverLog(LL_WARNING3,
562 "Sentinel config file %s is not writable: %s. Exiting...",
563 server.configfile,strerror(errno(*__errno_location ())));
564 exit(1);
565 }
566
567 /* If this Sentinel has yet no ID set in the configuration file, we
568 * pick a random one and persist the config on disk. From now on this
569 * will be this Sentinel ID across restarts. */
570 for (j = 0; j < CONFIG_RUN_ID_SIZE40; j++)
571 if (sentinel.myid[j] != 0) break;
572
573 if (j == CONFIG_RUN_ID_SIZE40) {
574 /* Pick ID and persist the config. */
575 getRandomHexChars(sentinel.myid,CONFIG_RUN_ID_SIZE40);
576 sentinelFlushConfig();
577 }
578
579 /* Log its ID to make debugging of issues simpler. */
580 serverLog(LL_WARNING3,"Sentinel ID is %s", sentinel.myid);
581
582 /* We want to generate a +monitor event for every configured master
583 * at startup. */
584 sentinelGenerateInitialMonitorEvents();
585}
586
587/* ============================== sentinelAddr ============================== */
588
589/* Create a sentinelAddr object and return it on success.
590 * On error NULL is returned and errno is set to:
591 * ENOENT: Can't resolve the hostname.
592 * EINVAL: Invalid port number.
593 */
594sentinelAddr *createSentinelAddr(char *hostname, int port) {
595 char ip[NET_IP_STR_LEN46];
596 sentinelAddr *sa;
597
598 if (port < 0 || port > 65535) {
599 errno(*__errno_location ()) = EINVAL22;
600 return NULL((void*)0);
601 }
602 if (anetResolve(NULL((void*)0),hostname,ip,sizeof(ip),
603 sentinel.resolve_hostnames ? ANET_NONE0 : ANET_IP_ONLY(1<<0)) == ANET_ERR-1) {
604 errno(*__errno_location ()) = ENOENT2;
605 return NULL((void*)0);
606 }
607 sa = zmalloc(sizeof(*sa));
608 sa->hostname = sdsnew(hostname);
609 sa->ip = sdsnew(ip);
610 sa->port = port;
611 return sa;
612}
613
614/* Return a duplicate of the source address. */
615sentinelAddr *dupSentinelAddr(sentinelAddr *src) {
616 sentinelAddr *sa;
617
618 sa = zmalloc(sizeof(*sa));
619 sa->hostname = sdsnew(src->hostname);
620 sa->ip = sdsnew(src->ip);
621 sa->port = src->port;
622 return sa;
623}
624
625/* Free a Sentinel address. Can't fail. */
626void releaseSentinelAddr(sentinelAddr *sa) {
627 sdsfree(sa->hostname);
628 sdsfree(sa->ip);
629 zfree(sa);
630}
631
632/* Return non-zero if two addresses are equal. */
633int sentinelAddrIsEqual(sentinelAddr *a, sentinelAddr *b) {
634 return a->port == b->port && !strcasecmp(a->ip,b->ip);
635}
636
637/* Return non-zero if a hostname matches an address. */
638int sentinelAddrEqualsHostname(sentinelAddr *a, char *hostname) {
639 char ip[NET_IP_STR_LEN46];
640
641 /* We always resolve the hostname and compare it to the address */
642 if (anetResolve(NULL((void*)0), hostname, ip, sizeof(ip),
643 sentinel.resolve_hostnames ? ANET_NONE0 : ANET_IP_ONLY(1<<0)) == ANET_ERR-1)
644 return 0;
645 return !strcasecmp(a->ip, ip);
646}
647
648const char *announceSentinelAddr(const sentinelAddr *a) {
649 return sentinel.announce_hostnames ? a->hostname : a->ip;
650}
651
652/* Return an allocated sds with hostname/address:port. IPv6
653 * addresses are bracketed the same way anetFormatAddr() does.
654 */
655sds announceSentinelAddrAndPort(const sentinelAddr *a) {
656 const char *addr = announceSentinelAddr(a);
657 if (strchr(addr, ':') != NULL((void*)0))
658 return sdscatprintf(sdsempty(), "[%s]:%d", addr, a->port);
659 else
660 return sdscatprintf(sdsempty(), "%s:%d", addr, a->port);
661}
662
663/* =========================== Events notification ========================== */
664
665/* Send an event to log, pub/sub, user notification script.
666 *
667 * 'level' is the log level for logging. Only LL_WARNING events will trigger
668 * the execution of the user notification script.
669 *
670 * 'type' is the message type, also used as a pub/sub channel name.
671 *
672 * 'ri', is the redis instance target of this event if applicable, and is
673 * used to obtain the path of the notification script to execute.
674 *
675 * The remaining arguments are printf-alike.
676 * If the format specifier starts with the two characters "%@" then ri is
677 * not NULL, and the message is prefixed with an instance identifier in the
678 * following format:
679 *
680 * <instance type> <instance name> <ip> <port>
681 *
682 * If the instance type is not master, than the additional string is
683 * added to specify the originating master:
684 *
685 * @ <master name> <master ip> <master port>
686 *
687 * Any other specifier after "%@" is processed by printf itself.
688 */
689void sentinelEvent(int level, char *type, sentinelRedisInstance *ri,
690 const char *fmt, ...) {
691 va_list ap;
692 char msg[LOG_MAX_LEN1024];
693 robj *channel, *payload;
694
695 /* Handle %@ */
696 if (fmt[0] == '%' && fmt[1] == '@') {
697 sentinelRedisInstance *master = (ri->flags & SRI_MASTER(1<<0)) ?
698 NULL((void*)0) : ri->master;
699
700 if (master) {
701 snprintf(msg, sizeof(msg), "%s %s %s %d @ %s %s %d",
702 sentinelRedisInstanceTypeStr(ri),
703 ri->name, announceSentinelAddr(ri->addr), ri->addr->port,
704 master->name, announceSentinelAddr(master->addr), master->addr->port);
705 } else {
706 snprintf(msg, sizeof(msg), "%s %s %s %d",
707 sentinelRedisInstanceTypeStr(ri),
708 ri->name, announceSentinelAddr(ri->addr), ri->addr->port);
709 }
710 fmt += 2;
711 } else {
712 msg[0] = '\0';
713 }
714
715 /* Use vsprintf for the rest of the formatting if any. */
716 if (fmt[0] != '\0') {
717 va_start(ap, fmt)__builtin_va_start(ap, fmt);
718 vsnprintf(msg+strlen(msg), sizeof(msg)-strlen(msg), fmt, ap);
719 va_end(ap)__builtin_va_end(ap);
720 }
721
722 /* Log the message if the log level allows it to be logged. */
723 if (level >= server.verbosity)
724 serverLog(level,"%s %s",type,msg);
725
726 /* Publish the message via Pub/Sub if it's not a debugging one. */
727 if (level != LL_DEBUG0) {
728 channel = createStringObject(type,strlen(type));
729 payload = createStringObject(msg,strlen(msg));
730 pubsubPublishMessage(channel,payload);
731 decrRefCount(channel);
732 decrRefCount(payload);
733 }
734
735 /* Call the notification script if applicable. */
736 if (level == LL_WARNING3 && ri != NULL((void*)0)) {
737 sentinelRedisInstance *master = (ri->flags & SRI_MASTER(1<<0)) ?
738 ri : ri->master;
739 if (master && master->notification_script) {
740 sentinelScheduleScriptExecution(master->notification_script,
741 type,msg,NULL((void*)0));
742 }
743 }
744}
745
746/* This function is called only at startup and is used to generate a
747 * +monitor event for every configured master. The same events are also
748 * generated when a master to monitor is added at runtime via the
749 * SENTINEL MONITOR command. */
750void sentinelGenerateInitialMonitorEvents(void) {
751 dictIterator *di;
752 dictEntry *de;
753
754 di = dictGetIterator(sentinel.masters);
755 while((de = dictNext(di)) != NULL((void*)0)) {
756 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
757 sentinelEvent(LL_WARNING3,"+monitor",ri,"%@ quorum %d",ri->quorum);
758 }
759 dictReleaseIterator(di);
760}
761
762/* ============================ script execution ============================ */
763
764/* Release a script job structure and all the associated data. */
765void sentinelReleaseScriptJob(sentinelScriptJob *sj) {
766 int j = 0;
767
768 while(sj->argv[j]) sdsfree(sj->argv[j++]);
769 zfree(sj->argv);
770 zfree(sj);
771}
772
773#define SENTINEL_SCRIPT_MAX_ARGS16 16
774void sentinelScheduleScriptExecution(char *path, ...) {
775 va_list ap;
776 char *argv[SENTINEL_SCRIPT_MAX_ARGS16+1];
777 int argc = 1;
778 sentinelScriptJob *sj;
779
780 va_start(ap, path)__builtin_va_start(ap, path);
781 while(argc < SENTINEL_SCRIPT_MAX_ARGS16) {
782 argv[argc] = va_arg(ap,char*)__builtin_va_arg(ap, char*);
783 if (!argv[argc]) break;
784 argv[argc] = sdsnew(argv[argc]); /* Copy the string. */
785 argc++;
786 }
787 va_end(ap)__builtin_va_end(ap);
788 argv[0] = sdsnew(path);
789
790 sj = zmalloc(sizeof(*sj));
791 sj->flags = SENTINEL_SCRIPT_NONE0;
792 sj->retry_num = 0;
793 sj->argv = zmalloc(sizeof(char*)*(argc+1));
794 sj->start_time = 0;
795 sj->pid = 0;
796 memcpy(sj->argv,argv,sizeof(char*)*(argc+1));
797
798 listAddNodeTail(sentinel.scripts_queue,sj);
799
800 /* Remove the oldest non running script if we already hit the limit. */
801 if (listLength(sentinel.scripts_queue)((sentinel.scripts_queue)->len) > SENTINEL_SCRIPT_MAX_QUEUE256) {
802 listNode *ln;
803 listIter li;
804
805 listRewind(sentinel.scripts_queue,&li);
806 while ((ln = listNext(&li)) != NULL((void*)0)) {
807 sj = ln->value;
808
809 if (sj->flags & SENTINEL_SCRIPT_RUNNING1) continue;
810 /* The first node is the oldest as we add on tail. */
811 listDelNode(sentinel.scripts_queue,ln);
812 sentinelReleaseScriptJob(sj);
813 break;
814 }
815 serverAssert(listLength(sentinel.scripts_queue) <=((((sentinel.scripts_queue)->len) <= 256)?(void)0 : (_serverAssert
("listLength(sentinel.scripts_queue) <= SENTINEL_SCRIPT_MAX_QUEUE"
,"sentinel.c",816),__builtin_unreachable()))
816 SENTINEL_SCRIPT_MAX_QUEUE)((((sentinel.scripts_queue)->len) <= 256)?(void)0 : (_serverAssert
("listLength(sentinel.scripts_queue) <= SENTINEL_SCRIPT_MAX_QUEUE"
,"sentinel.c",816),__builtin_unreachable()))
;
817 }
818}
819
820/* Lookup a script in the scripts queue via pid, and returns the list node
821 * (so that we can easily remove it from the queue if needed). */
822listNode *sentinelGetScriptListNodeByPid(pid_t pid) {
823 listNode *ln;
824 listIter li;
825
826 listRewind(sentinel.scripts_queue,&li);
827 while ((ln = listNext(&li)) != NULL((void*)0)) {
828 sentinelScriptJob *sj = ln->value;
829
830 if ((sj->flags & SENTINEL_SCRIPT_RUNNING1) && sj->pid == pid)
831 return ln;
832 }
833 return NULL((void*)0);
834}
835
836/* Run pending scripts if we are not already at max number of running
837 * scripts. */
838void sentinelRunPendingScripts(void) {
839 listNode *ln;
840 listIter li;
841 mstime_t now = mstime();
842
843 /* Find jobs that are not running and run them, from the top to the
844 * tail of the queue, so we run older jobs first. */
845 listRewind(sentinel.scripts_queue,&li);
846 while (sentinel.running_scripts < SENTINEL_SCRIPT_MAX_RUNNING16 &&
847 (ln = listNext(&li)) != NULL((void*)0))
848 {
849 sentinelScriptJob *sj = ln->value;
850 pid_t pid;
851
852 /* Skip if already running. */
853 if (sj->flags & SENTINEL_SCRIPT_RUNNING1) continue;
854
855 /* Skip if it's a retry, but not enough time has elapsed. */
856 if (sj->start_time && sj->start_time > now) continue;
857
858 sj->flags |= SENTINEL_SCRIPT_RUNNING1;
859 sj->start_time = mstime();
860 sj->retry_num++;
861 pid = fork();
862
863 if (pid == -1) {
864 /* Parent (fork error).
865 * We report fork errors as signal 99, in order to unify the
866 * reporting with other kind of errors. */
867 sentinelEvent(LL_WARNING3,"-script-error",NULL((void*)0),
868 "%s %d %d", sj->argv[0], 99, 0);
869 sj->flags &= ~SENTINEL_SCRIPT_RUNNING1;
870 sj->pid = 0;
871 } else if (pid == 0) {
872 /* Child */
873 execve(sj->argv[0],sj->argv,environ);
874 /* If we are here an error occurred. */
875 _exit(2); /* Don't retry execution. */
876 } else {
877 sentinel.running_scripts++;
878 sj->pid = pid;
879 sentinelEvent(LL_DEBUG0,"+script-child",NULL((void*)0),"%ld",(long)pid);
880 }
881 }
882}
883
884/* How much to delay the execution of a script that we need to retry after
885 * an error?
886 *
887 * We double the retry delay for every further retry we do. So for instance
888 * if RETRY_DELAY is set to 30 seconds and the max number of retries is 10
889 * starting from the second attempt to execute the script the delays are:
890 * 30 sec, 60 sec, 2 min, 4 min, 8 min, 16 min, 32 min, 64 min, 128 min. */
891mstime_t sentinelScriptRetryDelay(int retry_num) {
892 mstime_t delay = SENTINEL_SCRIPT_RETRY_DELAY30000;
893
894 while (retry_num-- > 1) delay *= 2;
895 return delay;
896}
897
898/* Check for scripts that terminated, and remove them from the queue if the
899 * script terminated successfully. If instead the script was terminated by
900 * a signal, or returned exit code "1", it is scheduled to run again if
901 * the max number of retries did not already elapsed. */
902void sentinelCollectTerminatedScripts(void) {
903 int statloc;
904 pid_t pid;
905
906 while ((pid = wait3(&statloc,WNOHANG1,NULL((void*)0))) > 0) {
907 int exitcode = WEXITSTATUS(statloc)(((statloc) & 0xff00) >> 8);
908 int bysignal = 0;
909 listNode *ln;
910 sentinelScriptJob *sj;
911
912 if (WIFSIGNALED(statloc)(((signed char) (((statloc) & 0x7f) + 1) >> 1) >
0)
) bysignal = WTERMSIG(statloc)((statloc) & 0x7f);
913 sentinelEvent(LL_DEBUG0,"-script-child",NULL((void*)0),"%ld %d %d",
914 (long)pid, exitcode, bysignal);
915
916 ln = sentinelGetScriptListNodeByPid(pid);
917 if (ln == NULL((void*)0)) {
918 serverLog(LL_WARNING3,"wait3() returned a pid (%ld) we can't find in our scripts execution queue!", (long)pid);
919 continue;
920 }
921 sj = ln->value;
922
923 /* If the script was terminated by a signal or returns an
924 * exit code of "1" (that means: please retry), we reschedule it
925 * if the max number of retries is not already reached. */
926 if ((bysignal || exitcode == 1) &&
927 sj->retry_num != SENTINEL_SCRIPT_MAX_RETRY10)
928 {
929 sj->flags &= ~SENTINEL_SCRIPT_RUNNING1;
930 sj->pid = 0;
931 sj->start_time = mstime() +
932 sentinelScriptRetryDelay(sj->retry_num);
933 } else {
934 /* Otherwise let's remove the script, but log the event if the
935 * execution did not terminated in the best of the ways. */
936 if (bysignal || exitcode != 0) {
937 sentinelEvent(LL_WARNING3,"-script-error",NULL((void*)0),
938 "%s %d %d", sj->argv[0], bysignal, exitcode);
939 }
940 listDelNode(sentinel.scripts_queue,ln);
941 sentinelReleaseScriptJob(sj);
942 }
943 sentinel.running_scripts--;
944 }
945}
946
947/* Kill scripts in timeout, they'll be collected by the
948 * sentinelCollectTerminatedScripts() function. */
949void sentinelKillTimedoutScripts(void) {
950 listNode *ln;
951 listIter li;
952 mstime_t now = mstime();
953
954 listRewind(sentinel.scripts_queue,&li);
955 while ((ln = listNext(&li)) != NULL((void*)0)) {
956 sentinelScriptJob *sj = ln->value;
957
958 if (sj->flags & SENTINEL_SCRIPT_RUNNING1 &&
959 (now - sj->start_time) > SENTINEL_SCRIPT_MAX_RUNTIME60000)
960 {
961 sentinelEvent(LL_WARNING3,"-script-timeout",NULL((void*)0),"%s %ld",
962 sj->argv[0], (long)sj->pid);
963 kill(sj->pid,SIGKILL9);
964 }
965 }
966}
967
968/* Implements SENTINEL PENDING-SCRIPTS command. */
969void sentinelPendingScriptsCommand(client *c) {
970 listNode *ln;
971 listIter li;
972
973 addReplyArrayLen(c,listLength(sentinel.scripts_queue)((sentinel.scripts_queue)->len));
974 listRewind(sentinel.scripts_queue,&li);
975 while ((ln = listNext(&li)) != NULL((void*)0)) {
976 sentinelScriptJob *sj = ln->value;
977 int j = 0;
978
979 addReplyMapLen(c,5);
980
981 addReplyBulkCString(c,"argv");
982 while (sj->argv[j]) j++;
983 addReplyArrayLen(c,j);
984 j = 0;
985 while (sj->argv[j]) addReplyBulkCString(c,sj->argv[j++]);
986
987 addReplyBulkCString(c,"flags");
988 addReplyBulkCString(c,
989 (sj->flags & SENTINEL_SCRIPT_RUNNING1) ? "running" : "scheduled");
990
991 addReplyBulkCString(c,"pid");
992 addReplyBulkLongLong(c,sj->pid);
993
994 if (sj->flags & SENTINEL_SCRIPT_RUNNING1) {
995 addReplyBulkCString(c,"run-time");
996 addReplyBulkLongLong(c,mstime() - sj->start_time);
997 } else {
998 mstime_t delay = sj->start_time ? (sj->start_time-mstime()) : 0;
999 if (delay < 0) delay = 0;
1000 addReplyBulkCString(c,"run-delay");
1001 addReplyBulkLongLong(c,delay);
1002 }
1003
1004 addReplyBulkCString(c,"retry-num");
1005 addReplyBulkLongLong(c,sj->retry_num);
1006 }
1007}
1008
1009/* This function calls, if any, the client reconfiguration script with the
1010 * following parameters:
1011 *
1012 * <master-name> <role> <state> <from-ip> <from-port> <to-ip> <to-port>
1013 *
1014 * It is called every time a failover is performed.
1015 *
1016 * <state> is currently always "failover".
1017 * <role> is either "leader" or "observer".
1018 *
1019 * from/to fields are respectively master -> promoted slave addresses for
1020 * "start" and "end". */
1021void sentinelCallClientReconfScript(sentinelRedisInstance *master, int role, char *state, sentinelAddr *from, sentinelAddr *to) {
1022 char fromport[32], toport[32];
1023
1024 if (master->client_reconfig_script == NULL((void*)0)) return;
1025 ll2string(fromport,sizeof(fromport),from->port);
1026 ll2string(toport,sizeof(toport),to->port);
1027 sentinelScheduleScriptExecution(master->client_reconfig_script,
1028 master->name,
1029 (role == SENTINEL_LEADER(1<<17)) ? "leader" : "observer",
1030 state, announceSentinelAddr(from), fromport,
1031 announceSentinelAddr(to), toport, NULL((void*)0));
1032}
1033
1034/* =============================== instanceLink ============================= */
1035
1036/* Create a not yet connected link object. */
1037instanceLink *createInstanceLink(void) {
1038 instanceLink *link = zmalloc(sizeof(*link));
1039
1040 link->refcount = 1;
1041 link->disconnected = 1;
1042 link->pending_commands = 0;
1043 link->cc = NULL((void*)0);
1044 link->pc = NULL((void*)0);
1045 link->cc_conn_time = 0;
1046 link->pc_conn_time = 0;
1047 link->last_reconn_time = 0;
1048 link->pc_last_activity = 0;
1049 /* We set the act_ping_time to "now" even if we actually don't have yet
1050 * a connection with the node, nor we sent a ping.
1051 * This is useful to detect a timeout in case we'll not be able to connect
1052 * with the node at all. */
1053 link->act_ping_time = mstime();
1054 link->last_ping_time = 0;
1055 link->last_avail_time = mstime();
1056 link->last_pong_time = mstime();
1057 return link;
1058}
1059
1060/* Disconnect a hiredis connection in the context of an instance link. */
1061void instanceLinkCloseConnection(instanceLink *link, redisAsyncContext *c) {
1062 if (c == NULL((void*)0)) return;
1063
1064 if (link->cc == c) {
1065 link->cc = NULL((void*)0);
1066 link->pending_commands = 0;
1067 }
1068 if (link->pc == c) link->pc = NULL((void*)0);
1069 c->data = NULL((void*)0);
1070 link->disconnected = 1;
1071 redisAsyncFree(c);
1072}
1073
1074/* Decrement the refcount of a link object, if it drops to zero, actually
1075 * free it and return NULL. Otherwise don't do anything and return the pointer
1076 * to the object.
1077 *
1078 * If we are not going to free the link and ri is not NULL, we rebind all the
1079 * pending requests in link->cc (hiredis connection for commands) to a
1080 * callback that will just ignore them. This is useful to avoid processing
1081 * replies for an instance that no longer exists. */
1082instanceLink *releaseInstanceLink(instanceLink *link, sentinelRedisInstance *ri)
1083{
1084 serverAssert(link->refcount > 0)((link->refcount > 0)?(void)0 : (_serverAssert("link->refcount > 0"
,"sentinel.c",1084),__builtin_unreachable()))
;
1085 link->refcount--;
1086 if (link->refcount != 0) {
1087 if (ri && ri->link->cc) {
1088 /* This instance may have pending callbacks in the hiredis async
1089 * context, having as 'privdata' the instance that we are going to
1090 * free. Let's rewrite the callback list, directly exploiting
1091 * hiredis internal data structures, in order to bind them with
1092 * a callback that will ignore the reply at all. */
1093 redisCallback *cb;
1094 redisCallbackList *callbacks = &link->cc->replies;
1095
1096 cb = callbacks->head;
1097 while(cb) {
1098 if (cb->privdata == ri) {
1099 cb->fn = sentinelDiscardReplyCallback;
1100 cb->privdata = NULL((void*)0); /* Not strictly needed. */
1101 }
1102 cb = cb->next;
1103 }
1104 }
1105 return link; /* Other active users. */
1106 }
1107
1108 instanceLinkCloseConnection(link,link->cc);
1109 instanceLinkCloseConnection(link,link->pc);
1110 zfree(link);
1111 return NULL((void*)0);
1112}
1113
1114/* This function will attempt to share the instance link we already have
1115 * for the same Sentinel in the context of a different master, with the
1116 * instance we are passing as argument.
1117 *
1118 * This way multiple Sentinel objects that refer all to the same physical
1119 * Sentinel instance but in the context of different masters will use
1120 * a single connection, will send a single PING per second for failure
1121 * detection and so forth.
1122 *
1123 * Return C_OK if a matching Sentinel was found in the context of a
1124 * different master and sharing was performed. Otherwise C_ERR
1125 * is returned. */
1126int sentinelTryConnectionSharing(sentinelRedisInstance *ri) {
1127 serverAssert(ri->flags & SRI_SENTINEL)((ri->flags & (1<<2))?(void)0 : (_serverAssert("ri->flags & SRI_SENTINEL"
,"sentinel.c",1127),__builtin_unreachable()))
;
1128 dictIterator *di;
1129 dictEntry *de;
1130
1131 if (ri->runid == NULL((void*)0)) return C_ERR-1; /* No way to identify it. */
1132 if (ri->link->refcount > 1) return C_ERR-1; /* Already shared. */
1133
1134 di = dictGetIterator(sentinel.masters);
1135 while((de = dictNext(di)) != NULL((void*)0)) {
1136 sentinelRedisInstance *master = dictGetVal(de)((de)->v.val), *match;
1137 /* We want to share with the same physical Sentinel referenced
1138 * in other masters, so skip our master. */
1139 if (master == ri->master) continue;
1140 match = getSentinelRedisInstanceByAddrAndRunID(master->sentinels,
1141 NULL((void*)0),0,ri->runid);
1142 if (match == NULL((void*)0)) continue; /* No match. */
1143 if (match == ri) continue; /* Should never happen but... safer. */
1144
1145 /* We identified a matching Sentinel, great! Let's free our link
1146 * and use the one of the matching Sentinel. */
1147 releaseInstanceLink(ri->link,NULL((void*)0));
1148 ri->link = match->link;
1149 match->link->refcount++;
1150 dictReleaseIterator(di);
1151 return C_OK0;
1152 }
1153 dictReleaseIterator(di);
1154 return C_ERR-1;
1155}
1156
1157/* Drop all connections to other sentinels. Returns the number of connections
1158 * dropped.*/
1159int sentinelDropConnections(void) {
1160 dictIterator *di;
1161 dictEntry *de;
1162 int dropped = 0;
1163
1164 di = dictGetIterator(sentinel.masters);
1165 while ((de = dictNext(di)) != NULL((void*)0)) {
1166 dictIterator *sdi;
1167 dictEntry *sde;
1168
1169 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
1170 sdi = dictGetIterator(ri->sentinels);
1171 while ((sde = dictNext(sdi)) != NULL((void*)0)) {
1172 sentinelRedisInstance *si = dictGetVal(sde)((sde)->v.val);
1173 if (!si->link->disconnected) {
1174 instanceLinkCloseConnection(si->link, si->link->pc);
1175 instanceLinkCloseConnection(si->link, si->link->cc);
1176 dropped++;
1177 }
1178 }
1179 dictReleaseIterator(sdi);
1180 }
1181 dictReleaseIterator(di);
1182
1183 return dropped;
1184}
1185
1186/* When we detect a Sentinel to switch address (reporting a different IP/port
1187 * pair in Hello messages), let's update all the matching Sentinels in the
1188 * context of other masters as well and disconnect the links, so that everybody
1189 * will be updated.
1190 *
1191 * Return the number of updated Sentinel addresses. */
1192int sentinelUpdateSentinelAddressInAllMasters(sentinelRedisInstance *ri) {
1193 serverAssert(ri->flags & SRI_SENTINEL)((ri->flags & (1<<2))?(void)0 : (_serverAssert("ri->flags & SRI_SENTINEL"
,"sentinel.c",1193),__builtin_unreachable()))
;
1194 dictIterator *di;
1195 dictEntry *de;
1196 int reconfigured = 0;
1197
1198 di = dictGetIterator(sentinel.masters);
1199 while((de = dictNext(di)) != NULL((void*)0)) {
1200 sentinelRedisInstance *master = dictGetVal(de)((de)->v.val), *match;
1201 match = getSentinelRedisInstanceByAddrAndRunID(master->sentinels,
1202 NULL((void*)0),0,ri->runid);
1203 /* If there is no match, this master does not know about this
1204 * Sentinel, try with the next one. */
1205 if (match == NULL((void*)0)) continue;
1206
1207 /* Disconnect the old links if connected. */
1208 if (match->link->cc != NULL((void*)0))
1209 instanceLinkCloseConnection(match->link,match->link->cc);
1210 if (match->link->pc != NULL((void*)0))
1211 instanceLinkCloseConnection(match->link,match->link->pc);
1212
1213 if (match == ri) continue; /* Address already updated for it. */
1214
1215 /* Update the address of the matching Sentinel by copying the address
1216 * of the Sentinel object that received the address update. */
1217 releaseSentinelAddr(match->addr);
1218 match->addr = dupSentinelAddr(ri->addr);
1219 reconfigured++;
1220 }
1221 dictReleaseIterator(di);
1222 if (reconfigured)
1223 sentinelEvent(LL_NOTICE2,"+sentinel-address-update", ri,
1224 "%@ %d additional matching instances", reconfigured);
1225 return reconfigured;
1226}
1227
1228/* This function is called when a hiredis connection reported an error.
1229 * We set it to NULL and mark the link as disconnected so that it will be
1230 * reconnected again.
1231 *
1232 * Note: we don't free the hiredis context as hiredis will do it for us
1233 * for async connections. */
1234void instanceLinkConnectionError(const redisAsyncContext *c) {
1235 instanceLink *link = c->data;
1236 int pubsub;
1237
1238 if (!link) return;
1239
1240 pubsub = (link->pc == c);
1241 if (pubsub)
1242 link->pc = NULL((void*)0);
1243 else
1244 link->cc = NULL((void*)0);
1245 link->disconnected = 1;
1246}
1247
1248/* Hiredis connection established / disconnected callbacks. We need them
1249 * just to cleanup our link state. */
1250void sentinelLinkEstablishedCallback(const redisAsyncContext *c, int status) {
1251 if (status != C_OK0) instanceLinkConnectionError(c);
1252}
1253
1254void sentinelDisconnectCallback(const redisAsyncContext *c, int status) {
1255 UNUSED(status)((void) status);
1256 instanceLinkConnectionError(c);
1257}
1258
1259/* ========================== sentinelRedisInstance ========================= */
1260
1261/* Create a redis instance, the following fields must be populated by the
1262 * caller if needed:
1263 * runid: set to NULL but will be populated once INFO output is received.
1264 * info_refresh: is set to 0 to mean that we never received INFO so far.
1265 *
1266 * If SRI_MASTER is set into initial flags the instance is added to
1267 * sentinel.masters table.
1268 *
1269 * if SRI_SLAVE or SRI_SENTINEL is set then 'master' must be not NULL and the
1270 * instance is added into master->slaves or master->sentinels table.
1271 *
1272 * If the instance is a slave or sentinel, the name parameter is ignored and
1273 * is created automatically as hostname:port.
1274 *
1275 * The function fails if hostname can't be resolved or port is out of range.
1276 * When this happens NULL is returned and errno is set accordingly to the
1277 * createSentinelAddr() function.
1278 *
1279 * The function may also fail and return NULL with errno set to EBUSY if
1280 * a master with the same name, a slave with the same address, or a sentinel
1281 * with the same ID already exists. */
1282
1283sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *hostname, int port, int quorum, sentinelRedisInstance *master) {
1284 sentinelRedisInstance *ri;
1285 sentinelAddr *addr;
1286 dict *table = NULL((void*)0);
1287 sds sdsname;
1288
1289 serverAssert(flags & (SRI_MASTER|SRI_SLAVE|SRI_SENTINEL))((flags & ((1<<0)|(1<<1)|(1<<2)))?(void
)0 : (_serverAssert("flags & (SRI_MASTER|SRI_SLAVE|SRI_SENTINEL)"
,"sentinel.c",1289),__builtin_unreachable()))
;
1290 serverAssert((flags & SRI_MASTER) || master != NULL)(((flags & (1<<0)) || master != ((void*)0))?(void)0
: (_serverAssert("(flags & SRI_MASTER) || master != NULL"
,"sentinel.c",1290),__builtin_unreachable()))
;
1291
1292 /* Check address validity. */
1293 addr = createSentinelAddr(hostname,port);
1294 if (addr == NULL((void*)0)) return NULL((void*)0);
1295
1296 /* For slaves use ip/host:port as name. */
1297 if (flags & SRI_SLAVE(1<<1))
1298 sdsname = announceSentinelAddrAndPort(addr);
1299 else
1300 sdsname = sdsnew(name);
1301
1302 /* Make sure the entry is not duplicated. This may happen when the same
1303 * name for a master is used multiple times inside the configuration or
1304 * if we try to add multiple times a slave or sentinel with same ip/port
1305 * to a master. */
1306 if (flags & SRI_MASTER(1<<0)) table = sentinel.masters;
1307 else if (flags & SRI_SLAVE(1<<1)) table = master->slaves;
1308 else if (flags & SRI_SENTINEL(1<<2)) table = master->sentinels;
1309 if (dictFind(table,sdsname)) {
1310 releaseSentinelAddr(addr);
1311 sdsfree(sdsname);
1312 errno(*__errno_location ()) = EBUSY16;
1313 return NULL((void*)0);
1314 }
1315
1316 /* Create the instance object. */
1317 ri = zmalloc(sizeof(*ri));
1318 /* Note that all the instances are started in the disconnected state,
1319 * the event loop will take care of connecting them. */
1320 ri->flags = flags;
1321 ri->name = sdsname;
1322 ri->runid = NULL((void*)0);
1323 ri->config_epoch = 0;
1324 ri->addr = addr;
1325 ri->link = createInstanceLink();
1326 ri->last_pub_time = mstime();
1327 ri->last_hello_time = mstime();
1328 ri->last_master_down_reply_time = mstime();
1329 ri->s_down_since_time = 0;
1330 ri->o_down_since_time = 0;
1331 ri->down_after_period = master ? master->down_after_period :
1332 SENTINEL_DEFAULT_DOWN_AFTER30000;
1333 ri->master_link_down_time = 0;
1334 ri->auth_pass = NULL((void*)0);
1335 ri->auth_user = NULL((void*)0);
1336 ri->slave_priority = SENTINEL_DEFAULT_SLAVE_PRIORITY100;
1337 ri->slave_reconf_sent_time = 0;
1338 ri->slave_master_host = NULL((void*)0);
1339 ri->slave_master_port = 0;
1340 ri->slave_master_link_status = SENTINEL_MASTER_LINK_STATUS_DOWN1;
1341 ri->slave_repl_offset = 0;
1342 ri->sentinels = dictCreate(&instancesDictType,NULL((void*)0));
1343 ri->quorum = quorum;
1344 ri->parallel_syncs = SENTINEL_DEFAULT_PARALLEL_SYNCS1;
1345 ri->master = master;
1346 ri->slaves = dictCreate(&instancesDictType,NULL((void*)0));
1347 ri->info_refresh = 0;
1348 ri->renamed_commands = dictCreate(&renamedCommandsDictType,NULL((void*)0));
1349
1350 /* Failover state. */
1351 ri->leader = NULL((void*)0);
1352 ri->leader_epoch = 0;
1353 ri->failover_epoch = 0;
1354 ri->failover_state = SENTINEL_FAILOVER_STATE_NONE0;
1355 ri->failover_state_change_time = 0;
1356 ri->failover_start_time = 0;
1357 ri->failover_timeout = SENTINEL_DEFAULT_FAILOVER_TIMEOUT(60*3*1000);
1358 ri->failover_delay_logged = 0;
1359 ri->promoted_slave = NULL((void*)0);
1360 ri->notification_script = NULL((void*)0);
1361 ri->client_reconfig_script = NULL((void*)0);
1362 ri->info = NULL((void*)0);
1363
1364 /* Role */
1365 ri->role_reported = ri->flags & (SRI_MASTER(1<<0)|SRI_SLAVE(1<<1));
1366 ri->role_reported_time = mstime();
1367 ri->slave_conf_change_time = mstime();
1368
1369 /* Add into the right table. */
1370 dictAdd(table, ri->name, ri);
1371 return ri;
1372}
1373
1374/* Release this instance and all its slaves, sentinels, hiredis connections.
1375 * This function does not take care of unlinking the instance from the main
1376 * masters table (if it is a master) or from its master sentinels/slaves table
1377 * if it is a slave or sentinel. */
1378void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
1379 /* Release all its slaves or sentinels if any. */
1380 dictRelease(ri->sentinels);
1381 dictRelease(ri->slaves);
1382
1383 /* Disconnect the instance. */
1384 releaseInstanceLink(ri->link,ri);
1385
1386 /* Free other resources. */
1387 sdsfree(ri->name);
1388 sdsfree(ri->runid);
1389 sdsfree(ri->notification_script);
1390 sdsfree(ri->client_reconfig_script);
1391 sdsfree(ri->slave_master_host);
1392 sdsfree(ri->leader);
1393 sdsfree(ri->auth_pass);
1394 sdsfree(ri->auth_user);
1395 sdsfree(ri->info);
1396 releaseSentinelAddr(ri->addr);
1397 dictRelease(ri->renamed_commands);
1398
1399 /* Clear state into the master if needed. */
1400 if ((ri->flags & SRI_SLAVE(1<<1)) && (ri->flags & SRI_PROMOTED(1<<7)) && ri->master)
1401 ri->master->promoted_slave = NULL((void*)0);
1402
1403 zfree(ri);
1404}
1405
1406/* Lookup a slave in a master Redis instance, by ip and port. */
1407sentinelRedisInstance *sentinelRedisInstanceLookupSlave(
1408 sentinelRedisInstance *ri, char *slave_addr, int port)
1409{
1410 sds key;
1411 sentinelRedisInstance *slave;
1412 sentinelAddr *addr;
1413
1414 serverAssert(ri->flags & SRI_MASTER)((ri->flags & (1<<0))?(void)0 : (_serverAssert("ri->flags & SRI_MASTER"
,"sentinel.c",1414),__builtin_unreachable()))
;
1415
1416 /* We need to handle a slave_addr that is potentially a hostname.
1417 * If that is the case, depending on configuration we either resolve
1418 * it and use the IP addres or fail.
1419 */
1420 addr = createSentinelAddr(slave_addr, port);
1421 if (!addr) return NULL((void*)0);
1422 key = announceSentinelAddrAndPort(addr);
1423 releaseSentinelAddr(addr);
1424
1425 slave = dictFetchValue(ri->slaves,key);
1426 sdsfree(key);
1427 return slave;
1428}
1429
1430/* Return the name of the type of the instance as a string. */
1431const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri) {
1432 if (ri->flags & SRI_MASTER(1<<0)) return "master";
1433 else if (ri->flags & SRI_SLAVE(1<<1)) return "slave";
1434 else if (ri->flags & SRI_SENTINEL(1<<2)) return "sentinel";
1435 else return "unknown";
1436}
1437
1438/* This function remove the Sentinel with the specified ID from the
1439 * specified master.
1440 *
1441 * If "runid" is NULL the function returns ASAP.
1442 *
1443 * This function is useful because on Sentinels address switch, we want to
1444 * remove our old entry and add a new one for the same ID but with the new
1445 * address.
1446 *
1447 * The function returns 1 if the matching Sentinel was removed, otherwise
1448 * 0 if there was no Sentinel with this ID. */
1449int removeMatchingSentinelFromMaster(sentinelRedisInstance *master, char *runid) {
1450 dictIterator *di;
1451 dictEntry *de;
1452 int removed = 0;
1453
1454 if (runid == NULL((void*)0)) return 0;
1455
1456 di = dictGetSafeIterator(master->sentinels);
1457 while((de = dictNext(di)) != NULL((void*)0)) {
1458 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
1459
1460 if (ri->runid && strcmp(ri->runid,runid) == 0) {
1461 dictDelete(master->sentinels,ri->name);
1462 removed++;
1463 }
1464 }
1465 dictReleaseIterator(di);
1466 return removed;
1467}
1468
1469/* Search an instance with the same runid, ip and port into a dictionary
1470 * of instances. Return NULL if not found, otherwise return the instance
1471 * pointer.
1472 *
1473 * runid or addr can be NULL. In such a case the search is performed only
1474 * by the non-NULL field. */
1475sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *addr, int port, char *runid) {
1476 dictIterator *di;
1477 dictEntry *de;
1478 sentinelRedisInstance *instance = NULL((void*)0);
1479 sentinelAddr *ri_addr = NULL((void*)0);
1480
1481 serverAssert(addr || runid)((addr || runid)?(void)0 : (_serverAssert("addr || runid","sentinel.c"
,1481),__builtin_unreachable()))
; /* User must pass at least one search param. */
1482 if (addr != NULL((void*)0)) {
1483 /* Resolve addr, we use the IP as a key even if a hostname is used */
1484 ri_addr = createSentinelAddr(addr, port);
1485 if (!ri_addr) return NULL((void*)0);
1486 }
1487 di = dictGetIterator(instances);
1488 while((de = dictNext(di)) != NULL((void*)0)) {
1489 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
1490
1491 if (runid && !ri->runid) continue;
1492 if ((runid == NULL((void*)0) || strcmp(ri->runid, runid) == 0) &&
1493 (addr == NULL((void*)0) || (strcmp(ri->addr->ip, ri_addr->ip) == 0 &&
1494 ri->addr->port == port)))
1495 {
1496 instance = ri;
1497 break;
1498 }
1499 }
1500 dictReleaseIterator(di);
1501 if (ri_addr != NULL((void*)0))
1502 releaseSentinelAddr(ri_addr);
1503
1504 return instance;
1505}
1506
1507/* Master lookup by name */
1508sentinelRedisInstance *sentinelGetMasterByName(char *name) {
1509 sentinelRedisInstance *ri;
1510 sds sdsname = sdsnew(name);
1511
1512 ri = dictFetchValue(sentinel.masters,sdsname);
1513 sdsfree(sdsname);
1514 return ri;
1515}
1516
1517/* Add the specified flags to all the instances in the specified dictionary. */
1518void sentinelAddFlagsToDictOfRedisInstances(dict *instances, int flags) {
1519 dictIterator *di;
1520 dictEntry *de;
1521
1522 di = dictGetIterator(instances);
1523 while((de = dictNext(di)) != NULL((void*)0)) {
1524 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
1525 ri->flags |= flags;
1526 }
1527 dictReleaseIterator(di);
1528}
1529
1530/* Remove the specified flags to all the instances in the specified
1531 * dictionary. */
1532void sentinelDelFlagsToDictOfRedisInstances(dict *instances, int flags) {
1533 dictIterator *di;
1534 dictEntry *de;
1535
1536 di = dictGetIterator(instances);
1537 while((de = dictNext(di)) != NULL((void*)0)) {
1538 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
1539 ri->flags &= ~flags;
1540 }
1541 dictReleaseIterator(di);
1542}
1543
1544/* Reset the state of a monitored master:
1545 * 1) Remove all slaves.
1546 * 2) Remove all sentinels.
1547 * 3) Remove most of the flags resulting from runtime operations.
1548 * 4) Reset timers to their default value. For example after a reset it will be
1549 * possible to failover again the same master ASAP, without waiting the
1550 * failover timeout delay.
1551 * 5) In the process of doing this undo the failover if in progress.
1552 * 6) Disconnect the connections with the master (will reconnect automatically).
1553 */
1554
1555#define SENTINEL_RESET_NO_SENTINELS(1<<0) (1<<0)
1556void sentinelResetMaster(sentinelRedisInstance *ri, int flags) {
1557 serverAssert(ri->flags & SRI_MASTER)((ri->flags & (1<<0))?(void)0 : (_serverAssert("ri->flags & SRI_MASTER"
,"sentinel.c",1557),__builtin_unreachable()))
;
1558 dictRelease(ri->slaves);
1559 ri->slaves = dictCreate(&instancesDictType,NULL((void*)0));
1560 if (!(flags & SENTINEL_RESET_NO_SENTINELS(1<<0))) {
1561 dictRelease(ri->sentinels);
1562 ri->sentinels = dictCreate(&instancesDictType,NULL((void*)0));
1563 }
1564 instanceLinkCloseConnection(ri->link,ri->link->cc);
1565 instanceLinkCloseConnection(ri->link,ri->link->pc);
1566 ri->flags &= SRI_MASTER(1<<0);
1567 if (ri->leader) {
1568 sdsfree(ri->leader);
1569 ri->leader = NULL((void*)0);
1570 }
1571 ri->failover_state = SENTINEL_FAILOVER_STATE_NONE0;
1572 ri->failover_state_change_time = 0;
1573 ri->failover_start_time = 0; /* We can failover again ASAP. */
1574 ri->promoted_slave = NULL((void*)0);
1575 sdsfree(ri->runid);
1576 sdsfree(ri->slave_master_host);
1577 ri->runid = NULL((void*)0);
1578 ri->slave_master_host = NULL((void*)0);
1579 ri->link->act_ping_time = mstime();
1580 ri->link->last_ping_time = 0;
1581 ri->link->last_avail_time = mstime();
1582 ri->link->last_pong_time = mstime();
1583 ri->role_reported_time = mstime();
1584 ri->role_reported = SRI_MASTER(1<<0);
1585 if (flags & SENTINEL_GENERATE_EVENT(1<<16))
1586 sentinelEvent(LL_WARNING3,"+reset-master",ri,"%@");
1587}
1588
1589/* Call sentinelResetMaster() on every master with a name matching the specified
1590 * pattern. */
1591int sentinelResetMastersByPattern(char *pattern, int flags) {
1592 dictIterator *di;
1593 dictEntry *de;
1594 int reset = 0;
1595
1596 di = dictGetIterator(sentinel.masters);
1597 while((de = dictNext(di)) != NULL((void*)0)) {
1598 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
1599
1600 if (ri->name) {
1601 if (stringmatch(pattern,ri->name,0)) {
1602 sentinelResetMaster(ri,flags);
1603 reset++;
1604 }
1605 }
1606 }
1607 dictReleaseIterator(di);
1608 return reset;
1609}
1610
1611/* Reset the specified master with sentinelResetMaster(), and also change
1612 * the ip:port address, but take the name of the instance unmodified.
1613 *
1614 * This is used to handle the +switch-master event.
1615 *
1616 * The function returns C_ERR if the address can't be resolved for some
1617 * reason. Otherwise C_OK is returned. */
1618int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *hostname, int port) {
1619 sentinelAddr *oldaddr, *newaddr;
1620 sentinelAddr **slaves = NULL((void*)0);
1621 int numslaves = 0, j;
1622 dictIterator *di;
1623 dictEntry *de;
1624
1625 newaddr = createSentinelAddr(hostname,port);
1626 if (newaddr == NULL((void*)0)) return C_ERR-1;
1627
1628 /* There can be only 0 or 1 slave that has the newaddr.
1629 * and It can add old master 1 more slave.
1630 * so It allocates dictSize(master->slaves) + 1 */
1631 slaves = zmalloc(sizeof(sentinelAddr*)*(dictSize(master->slaves)((master->slaves)->ht[0].used+(master->slaves)->ht
[1].used)
+ 1));
1632
1633 /* Don't include the one having the address we are switching to. */
1634 di = dictGetIterator(master->slaves);
1635 while((de = dictNext(di)) != NULL((void*)0)) {
1636 sentinelRedisInstance *slave = dictGetVal(de)((de)->v.val);
1637
1638 if (sentinelAddrIsEqual(slave->addr,newaddr)) continue;
1639 slaves[numslaves++] = dupSentinelAddr(slave->addr);
1640 }
1641 dictReleaseIterator(di);
1642
1643 /* If we are switching to a different address, include the old address
1644 * as a slave as well, so that we'll be able to sense / reconfigure
1645 * the old master. */
1646 if (!sentinelAddrIsEqual(newaddr,master->addr)) {
1647 slaves[numslaves++] = dupSentinelAddr(master->addr);
1648 }
1649
1650 /* Reset and switch address. */
1651 sentinelResetMaster(master,SENTINEL_RESET_NO_SENTINELS(1<<0));
1652 oldaddr = master->addr;
1653 master->addr = newaddr;
1654 master->o_down_since_time = 0;
1655 master->s_down_since_time = 0;
1656
1657 /* Add slaves back. */
1658 for (j = 0; j < numslaves; j++) {
1659 sentinelRedisInstance *slave;
1660
1661 slave = createSentinelRedisInstance(NULL((void*)0),SRI_SLAVE(1<<1),slaves[j]->hostname,
1662 slaves[j]->port, master->quorum, master);
1663 releaseSentinelAddr(slaves[j]);
1664 if (slave) sentinelEvent(LL_NOTICE2,"+slave",slave,"%@");
1665 }
1666 zfree(slaves);
1667
1668 /* Release the old address at the end so we are safe even if the function
1669 * gets the master->addr->ip and master->addr->port as arguments. */
1670 releaseSentinelAddr(oldaddr);
1671 sentinelFlushConfig();
1672 return C_OK0;
1673}
1674
1675/* Return non-zero if there was no SDOWN or ODOWN error associated to this
1676 * instance in the latest 'ms' milliseconds. */
1677int sentinelRedisInstanceNoDownFor(sentinelRedisInstance *ri, mstime_t ms) {
1678 mstime_t most_recent;
1679
1680 most_recent = ri->s_down_since_time;
1681 if (ri->o_down_since_time > most_recent)
1682 most_recent = ri->o_down_since_time;
1683 return most_recent == 0 || (mstime() - most_recent) > ms;
1684}
1685
1686/* Return the current master address, that is, its address or the address
1687 * of the promoted slave if already operational. */
1688sentinelAddr *sentinelGetCurrentMasterAddress(sentinelRedisInstance *master) {
1689 /* If we are failing over the master, and the state is already
1690 * SENTINEL_FAILOVER_STATE_RECONF_SLAVES or greater, it means that we
1691 * already have the new configuration epoch in the master, and the
1692 * slave acknowledged the configuration switch. Advertise the new
1693 * address. */
1694 if ((master->flags & SRI_FAILOVER_IN_PROGRESS(1<<6)) &&
1695 master->promoted_slave &&
1696 master->failover_state >= SENTINEL_FAILOVER_STATE_RECONF_SLAVES5)
1697 {
1698 return master->promoted_slave->addr;
1699 } else {
1700 return master->addr;
1701 }
1702}
1703
1704/* This function sets the down_after_period field value in 'master' to all
1705 * the slaves and sentinel instances connected to this master. */
1706void sentinelPropagateDownAfterPeriod(sentinelRedisInstance *master) {
1707 dictIterator *di;
1708 dictEntry *de;
1709 int j;
1710 dict *d[] = {master->slaves, master->sentinels, NULL((void*)0)};
1711
1712 for (j = 0; d[j]; j++) {
1713 di = dictGetIterator(d[j]);
1714 while((de = dictNext(di)) != NULL((void*)0)) {
1715 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
1716 ri->down_after_period = master->down_after_period;
1717 }
1718 dictReleaseIterator(di);
1719 }
1720}
1721
1722char *sentinelGetInstanceTypeString(sentinelRedisInstance *ri) {
1723 if (ri->flags & SRI_MASTER(1<<0)) return "master";
1724 else if (ri->flags & SRI_SLAVE(1<<1)) return "slave";
1725 else if (ri->flags & SRI_SENTINEL(1<<2)) return "sentinel";
1726 else return "unknown";
1727}
1728
1729/* This function is used in order to send commands to Redis instances: the
1730 * commands we send from Sentinel may be renamed, a common case is a master
1731 * with CONFIG and SLAVEOF commands renamed for security concerns. In that
1732 * case we check the ri->renamed_command table (or if the instance is a slave,
1733 * we check the one of the master), and map the command that we should send
1734 * to the set of renamed commads. However, if the command was not renamed,
1735 * we just return "command" itself. */
1736char *sentinelInstanceMapCommand(sentinelRedisInstance *ri, char *command) {
1737 sds sc = sdsnew(command);
1738 if (ri->master) ri = ri->master;
1739 char *retval = dictFetchValue(ri->renamed_commands, sc);
1740 sdsfree(sc);
1741 return retval ? retval : command;
1742}
1743
1744/* ============================ Config handling ============================= */
1745
1746/* Generalise handling create instance error. Use SRI_MASTER, SRI_SLAVE or
1747 * SRI_SENTINEL as a role value. */
1748const char *sentinelCheckCreateInstanceErrors(int role) {
1749 switch(errno(*__errno_location ())) {
1750 case EBUSY16:
1751 switch (role) {
1752 case SRI_MASTER(1<<0):
1753 return "Duplicate master name.";
1754 case SRI_SLAVE(1<<1):
1755 return "Duplicate hostname and port for replica.";
1756 case SRI_SENTINEL(1<<2):
1757 return "Duplicate runid for sentinel.";
1758 default:
1759 serverAssert(0)((0)?(void)0 : (_serverAssert("0","sentinel.c",1759),__builtin_unreachable
()))
;
1760 break;
1761 }
1762 break;
1763 case ENOENT2:
1764 return "Can't resolve instance hostname.";
1765 case EINVAL22:
1766 return "Invalid port number.";
1767 default:
1768 return "Unknown Error for creating instances.";
1769 }
1770}
1771
1772/* init function for server.sentinel_config */
1773void initializeSentinelConfig() {
1774 server.sentinel_config = zmalloc(sizeof(struct sentinelConfig));
1775 server.sentinel_config->monitor_cfg = listCreate();
1776 server.sentinel_config->pre_monitor_cfg = listCreate();
1777 server.sentinel_config->post_monitor_cfg = listCreate();
1778 listSetFreeMethod(server.sentinel_config->monitor_cfg,freeSentinelLoadQueueEntry)((server.sentinel_config->monitor_cfg)->free = (freeSentinelLoadQueueEntry
))
;
1779 listSetFreeMethod(server.sentinel_config->pre_monitor_cfg,freeSentinelLoadQueueEntry)((server.sentinel_config->pre_monitor_cfg)->free = (freeSentinelLoadQueueEntry
))
;
1780 listSetFreeMethod(server.sentinel_config->post_monitor_cfg,freeSentinelLoadQueueEntry)((server.sentinel_config->post_monitor_cfg)->free = (freeSentinelLoadQueueEntry
))
;
1781}
1782
1783/* destroy function for server.sentinel_config */
1784void freeSentinelConfig() {
1785 /* release these three config queues since we will not use it anymore */
1786 listRelease(server.sentinel_config->pre_monitor_cfg);
1787 listRelease(server.sentinel_config->monitor_cfg);
1788 listRelease(server.sentinel_config->post_monitor_cfg);
1789 zfree(server.sentinel_config);
1790 server.sentinel_config = NULL((void*)0);
1791}
1792
1793/* Search config name in pre monitor config name array, return 1 if found,
1794 * 0 if not found. */
1795int searchPreMonitorCfgName(const char *name) {
1796 for (unsigned int i = 0; i < sizeof(preMonitorCfgName)/sizeof(preMonitorCfgName[0]); i++) {
1797 if (!strcasecmp(preMonitorCfgName[i],name)) return 1;
1798 }
1799 return 0;
1800}
1801
1802/* free method for sentinelLoadQueueEntry when release the list */
1803void freeSentinelLoadQueueEntry(void *item) {
1804 struct sentinelLoadQueueEntry *entry = item;
1805 sdsfreesplitres(entry->argv,entry->argc);
1806 sdsfree(entry->line);
1807 zfree(entry);
1808}
1809
1810/* This function is used for queuing sentinel configuration, the main
1811 * purpose of this function is to delay parsing the sentinel config option
1812 * in order to avoid the order dependent issue from the config. */
1813void queueSentinelConfig(sds *argv, int argc, int linenum, sds line) {
1814 int i;
1815 struct sentinelLoadQueueEntry *entry;
1816
1817 /* initialize sentinel_config for the first call */
1818 if (server.sentinel_config == NULL((void*)0)) initializeSentinelConfig();
1819
1820 entry = zmalloc(sizeof(struct sentinelLoadQueueEntry));
1821 entry->argv = zmalloc(sizeof(char*)*argc);
1822 entry->argc = argc;
1823 entry->linenum = linenum;
1824 entry->line = sdsdup(line);
1825 for (i = 0; i < argc; i++) {
1826 entry->argv[i] = sdsdup(argv[i]);
1827 }
1828 /* Separate config lines with pre monitor config, monitor config and
1829 * post monitor config, in order to parsing config dependencies
1830 * correctly. */
1831 if (!strcasecmp(argv[0],"monitor")) {
1832 listAddNodeTail(server.sentinel_config->monitor_cfg,entry);
1833 } else if (searchPreMonitorCfgName(argv[0])) {
1834 listAddNodeTail(server.sentinel_config->pre_monitor_cfg,entry);
1835 } else{
1836 listAddNodeTail(server.sentinel_config->post_monitor_cfg,entry);
1837 }
1838}
1839
1840/* This function is used for loading the sentinel configuration from
1841 * pre_monitor_cfg, monitor_cfg and post_monitor_cfg list */
1842void loadSentinelConfigFromQueue(void) {
1843 const char *err = NULL((void*)0);
1844 listIter li;
1845 listNode *ln;
1846 int linenum = 0;
1847 sds line = NULL((void*)0);
1848
1849 /* if there is no sentinel_config entry, we can return immediately */
1850 if (server.sentinel_config == NULL((void*)0)) return;
1851
1852 /* loading from pre monitor config queue first to avoid dependency issues */
1853 listRewind(server.sentinel_config->pre_monitor_cfg,&li);
1854 while((ln = listNext(&li))) {
1855 struct sentinelLoadQueueEntry *entry = ln->value;
1856 err = sentinelHandleConfiguration(entry->argv,entry->argc);
1857 if (err) {
1858 linenum = entry->linenum;
1859 line = entry->line;
1860 goto loaderr;
1861 }
1862 }
1863
1864 /* loading from monitor config queue */
1865 listRewind(server.sentinel_config->monitor_cfg,&li);
1866 while((ln = listNext(&li))) {
1867 struct sentinelLoadQueueEntry *entry = ln->value;
1868 err = sentinelHandleConfiguration(entry->argv,entry->argc);
1869 if (err) {
1870 linenum = entry->linenum;
1871 line = entry->line;
1872 goto loaderr;
1873 }
1874 }
1875
1876 /* loading from the post monitor config queue */
1877 listRewind(server.sentinel_config->post_monitor_cfg,&li);
1878 while((ln = listNext(&li))) {
1879 struct sentinelLoadQueueEntry *entry = ln->value;
1880 err = sentinelHandleConfiguration(entry->argv,entry->argc);
1881 if (err) {
1882 linenum = entry->linenum;
1883 line = entry->line;
1884 goto loaderr;
1885 }
1886 }
1887
1888 /* free sentinel_config when config loading is finished */
1889 freeSentinelConfig();
1890 return;
1891
1892loaderr:
1893 fprintf(stderrstderr, "\n*** FATAL CONFIG FILE ERROR (Redis %s) ***\n",
1894 REDIS_VERSION"6.2.1");
1895 fprintf(stderrstderr, "Reading the configuration file, at line %d\n", linenum);
1896 fprintf(stderrstderr, ">>> '%s'\n", line);
1897 fprintf(stderrstderr, "%s\n", err);
1898 exit(1);
1899}
1900
1901const char *sentinelHandleConfiguration(char **argv, int argc) {
1902
1903 sentinelRedisInstance *ri;
1904
1905 if (!strcasecmp(argv[0],"monitor") && argc == 5) {
1906 /* monitor <name> <host> <port> <quorum> */
1907 int quorum = atoi(argv[4]);
1908
1909 if (quorum <= 0) return "Quorum must be 1 or greater.";
1910 if (createSentinelRedisInstance(argv[1],SRI_MASTER(1<<0),argv[2],
1911 atoi(argv[3]),quorum,NULL((void*)0)) == NULL((void*)0))
1912 {
1913 return sentinelCheckCreateInstanceErrors(SRI_MASTER(1<<0));
1914 }
1915 } else if (!strcasecmp(argv[0],"down-after-milliseconds") && argc == 3) {
1916 /* down-after-milliseconds <name> <milliseconds> */
1917 ri = sentinelGetMasterByName(argv[1]);
1918 if (!ri) return "No such master with specified name.";
1919 ri->down_after_period = atoi(argv[2]);
1920 if (ri->down_after_period <= 0)
1921 return "negative or zero time parameter.";
1922 sentinelPropagateDownAfterPeriod(ri);
1923 } else if (!strcasecmp(argv[0],"failover-timeout") && argc == 3) {
1924 /* failover-timeout <name> <milliseconds> */
1925 ri = sentinelGetMasterByName(argv[1]);
1926 if (!ri) return "No such master with specified name.";
1927 ri->failover_timeout = atoi(argv[2]);
1928 if (ri->failover_timeout <= 0)
1929 return "negative or zero time parameter.";
1930 } else if (!strcasecmp(argv[0],"parallel-syncs") && argc == 3) {
1931 /* parallel-syncs <name> <milliseconds> */
1932 ri = sentinelGetMasterByName(argv[1]);
1933 if (!ri) return "No such master with specified name.";
1934 ri->parallel_syncs = atoi(argv[2]);
1935 } else if (!strcasecmp(argv[0],"notification-script") && argc == 3) {
1936 /* notification-script <name> <path> */
1937 ri = sentinelGetMasterByName(argv[1]);
1938 if (!ri) return "No such master with specified name.";
1939 if (access(argv[2],X_OK1) == -1)
1940 return "Notification script seems non existing or non executable.";
1941 ri->notification_script = sdsnew(argv[2]);
1942 } else if (!strcasecmp(argv[0],"client-reconfig-script") && argc == 3) {
1943 /* client-reconfig-script <name> <path> */
1944 ri = sentinelGetMasterByName(argv[1]);
1945 if (!ri) return "No such master with specified name.";
1946 if (access(argv[2],X_OK1) == -1)
1947 return "Client reconfiguration script seems non existing or "
1948 "non executable.";
1949 ri->client_reconfig_script = sdsnew(argv[2]);
1950 } else if (!strcasecmp(argv[0],"auth-pass") && argc == 3) {
1951 /* auth-pass <name> <password> */
1952 ri = sentinelGetMasterByName(argv[1]);
1953 if (!ri) return "No such master with specified name.";
1954 ri->auth_pass = sdsnew(argv[2]);
1955 } else if (!strcasecmp(argv[0],"auth-user") && argc == 3) {
1956 /* auth-user <name> <username> */
1957 ri = sentinelGetMasterByName(argv[1]);
1958 if (!ri) return "No such master with specified name.";
1959 ri->auth_user = sdsnew(argv[2]);
1960 } else if (!strcasecmp(argv[0],"current-epoch") && argc == 2) {
1961 /* current-epoch <epoch> */
1962 unsigned long long current_epoch = strtoull(argv[1],NULL((void*)0),10);
1963 if (current_epoch > sentinel.current_epoch)
1964 sentinel.current_epoch = current_epoch;
1965 } else if (!strcasecmp(argv[0],"myid") && argc == 2) {
1966 if (strlen(argv[1]) != CONFIG_RUN_ID_SIZE40)
1967 return "Malformed Sentinel id in myid option.";
1968 memcpy(sentinel.myid,argv[1],CONFIG_RUN_ID_SIZE40);
1969 } else if (!strcasecmp(argv[0],"config-epoch") && argc == 3) {
1970 /* config-epoch <name> <epoch> */
1971 ri = sentinelGetMasterByName(argv[1]);
1972 if (!ri) return "No such master with specified name.";
1973 ri->config_epoch = strtoull(argv[2],NULL((void*)0),10);
1974 /* The following update of current_epoch is not really useful as
1975 * now the current epoch is persisted on the config file, but
1976 * we leave this check here for redundancy. */
1977 if (ri->config_epoch > sentinel.current_epoch)
1978 sentinel.current_epoch = ri->config_epoch;
1979 } else if (!strcasecmp(argv[0],"leader-epoch") && argc == 3) {
1980 /* leader-epoch <name> <epoch> */
1981 ri = sentinelGetMasterByName(argv[1]);
1982 if (!ri) return "No such master with specified name.";
1983 ri->leader_epoch = strtoull(argv[2],NULL((void*)0),10);
1984 } else if ((!strcasecmp(argv[0],"known-slave") ||
1985 !strcasecmp(argv[0],"known-replica")) && argc == 4)
1986 {
1987 sentinelRedisInstance *slave;
1988
1989 /* known-replica <name> <ip> <port> */
1990 ri = sentinelGetMasterByName(argv[1]);
1991 if (!ri) return "No such master with specified name.";
1992 if ((slave = createSentinelRedisInstance(NULL((void*)0),SRI_SLAVE(1<<1),argv[2],
Although the value stored to 'slave' is used in the enclosing expression, the value is never actually read from 'slave'
1993 atoi(argv[3]), ri->quorum, ri)) == NULL((void*)0))
1994 {
1995 return sentinelCheckCreateInstanceErrors(SRI_SLAVE(1<<1));
1996 }
1997 } else if (!strcasecmp(argv[0],"known-sentinel") &&
1998 (argc == 4 || argc == 5)) {
1999 sentinelRedisInstance *si;
2000
2001 if (argc == 5) { /* Ignore the old form without runid. */
2002 /* known-sentinel <name> <ip> <port> [runid] */
2003 ri = sentinelGetMasterByName(argv[1]);
2004 if (!ri) return "No such master with specified name.";
2005 if ((si = createSentinelRedisInstance(argv[4],SRI_SENTINEL(1<<2),argv[2],
2006 atoi(argv[3]), ri->quorum, ri)) == NULL((void*)0))
2007 {
2008 return sentinelCheckCreateInstanceErrors(SRI_SENTINEL(1<<2));
2009 }
2010 si->runid = sdsnew(argv[4]);
2011 sentinelTryConnectionSharing(si);
2012 }
2013 } else if (!strcasecmp(argv[0],"rename-command") && argc == 4) {
2014 /* rename-command <name> <command> <renamed-command> */
2015 ri = sentinelGetMasterByName(argv[1]);
2016 if (!ri) return "No such master with specified name.";
2017 sds oldcmd = sdsnew(argv[2]);
2018 sds newcmd = sdsnew(argv[3]);
2019 if (dictAdd(ri->renamed_commands,oldcmd,newcmd) != DICT_OK0) {
2020 sdsfree(oldcmd);
2021 sdsfree(newcmd);
2022 return "Same command renamed multiple times with rename-command.";
2023 }
2024 } else if (!strcasecmp(argv[0],"announce-ip") && argc == 2) {
2025 /* announce-ip <ip-address> */
2026 if (strlen(argv[1]))
2027 sentinel.announce_ip = sdsnew(argv[1]);
2028 } else if (!strcasecmp(argv[0],"announce-port") && argc == 2) {
2029 /* announce-port <port> */
2030 sentinel.announce_port = atoi(argv[1]);
2031 } else if (!strcasecmp(argv[0],"deny-scripts-reconfig") && argc == 2) {
2032 /* deny-scripts-reconfig <yes|no> */
2033 if ((sentinel.deny_scripts_reconfig = yesnotoi(argv[1])) == -1) {
2034 return "Please specify yes or no for the "
2035 "deny-scripts-reconfig options.";
2036 }
2037 } else if (!strcasecmp(argv[0],"sentinel-user") && argc == 2) {
2038 /* sentinel-user <user-name> */
2039 if (strlen(argv[1]))
2040 sentinel.sentinel_auth_user = sdsnew(argv[1]);
2041 } else if (!strcasecmp(argv[0],"sentinel-pass") && argc == 2) {
2042 /* sentinel-pass <password> */
2043 if (strlen(argv[1]))
2044 sentinel.sentinel_auth_pass = sdsnew(argv[1]);
2045 } else if (!strcasecmp(argv[0],"resolve-hostnames") && argc == 2) {
2046 /* resolve-hostnames <yes|no> */
2047 if ((sentinel.resolve_hostnames = yesnotoi(argv[1])) == -1) {
2048 return "Please specify yes or not for the resolve-hostnames option.";
2049 }
2050 } else if (!strcasecmp(argv[0],"announce-hostnames") && argc == 2) {
2051 /* announce-hostnames <yes|no> */
2052 if ((sentinel.announce_hostnames = yesnotoi(argv[1])) == -1) {
2053 return "Please specify yes or not for the announce-hostnames option.";
2054 }
2055 } else {
2056 return "Unrecognized sentinel configuration statement.";
2057 }
2058 return NULL((void*)0);
2059}
2060
2061/* Implements CONFIG REWRITE for "sentinel" option.
2062 * This is used not just to rewrite the configuration given by the user
2063 * (the configured masters) but also in order to retain the state of
2064 * Sentinel across restarts: config epoch of masters, associated slaves
2065 * and sentinel instances, and so forth. */
2066void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
2067 dictIterator *di, *di2;
2068 dictEntry *de;
2069 sds line;
2070
2071 /* sentinel unique ID. */
2072 line = sdscatprintf(sdsempty(), "sentinel myid %s", sentinel.myid);
2073 rewriteConfigRewriteLine(state,"sentinel myid",line,1);
2074
2075 /* sentinel deny-scripts-reconfig. */
2076 line = sdscatprintf(sdsempty(), "sentinel deny-scripts-reconfig %s",
2077 sentinel.deny_scripts_reconfig ? "yes" : "no");
2078 rewriteConfigRewriteLine(state,"sentinel deny-scripts-reconfig",line,
2079 sentinel.deny_scripts_reconfig != SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG1);
2080
2081 /* sentinel resolve-hostnames.
2082 * This must be included early in the file so it is already in effect
2083 * when reading the file.
2084 */
2085 line = sdscatprintf(sdsempty(), "sentinel resolve-hostnames %s",
2086 sentinel.resolve_hostnames ? "yes" : "no");
2087 rewriteConfigRewriteLine(state,"sentinel resolve-hostnames",line,
2088 sentinel.resolve_hostnames != SENTINEL_DEFAULT_RESOLVE_HOSTNAMES0);
2089
2090 /* sentinel announce-hostnames. */
2091 line = sdscatprintf(sdsempty(), "sentinel announce-hostnames %s",
2092 sentinel.announce_hostnames ? "yes" : "no");
2093 rewriteConfigRewriteLine(state,"sentinel announce-hostnames",line,
2094 sentinel.announce_hostnames != SENTINEL_DEFAULT_ANNOUNCE_HOSTNAMES0);
2095
2096 /* For every master emit a "sentinel monitor" config entry. */
2097 di = dictGetIterator(sentinel.masters);
2098 while((de = dictNext(di)) != NULL((void*)0)) {
2099 sentinelRedisInstance *master, *ri;
2100 sentinelAddr *master_addr;
2101
2102 /* sentinel monitor */
2103 master = dictGetVal(de)((de)->v.val);
2104 master_addr = sentinelGetCurrentMasterAddress(master);
2105 line = sdscatprintf(sdsempty(),"sentinel monitor %s %s %d %d",
2106 master->name, announceSentinelAddr(master_addr), master_addr->port,
2107 master->quorum);
2108 rewriteConfigRewriteLine(state,"sentinel monitor",line,1);
2109 /* rewriteConfigMarkAsProcessed is handled after the loop */
2110
2111 /* sentinel down-after-milliseconds */
2112 if (master->down_after_period != SENTINEL_DEFAULT_DOWN_AFTER30000) {
2113 line = sdscatprintf(sdsempty(),
2114 "sentinel down-after-milliseconds %s %ld",
2115 master->name, (long) master->down_after_period);
2116 rewriteConfigRewriteLine(state,"sentinel down-after-milliseconds",line,1);
2117 /* rewriteConfigMarkAsProcessed is handled after the loop */
2118 }
2119
2120 /* sentinel failover-timeout */
2121 if (master->failover_timeout != SENTINEL_DEFAULT_FAILOVER_TIMEOUT(60*3*1000)) {
2122 line = sdscatprintf(sdsempty(),
2123 "sentinel failover-timeout %s %ld",
2124 master->name, (long) master->failover_timeout);
2125 rewriteConfigRewriteLine(state,"sentinel failover-timeout",line,1);
2126 /* rewriteConfigMarkAsProcessed is handled after the loop */
2127
2128 }
2129
2130 /* sentinel parallel-syncs */
2131 if (master->parallel_syncs != SENTINEL_DEFAULT_PARALLEL_SYNCS1) {
2132 line = sdscatprintf(sdsempty(),
2133 "sentinel parallel-syncs %s %d",
2134 master->name, master->parallel_syncs);
2135 rewriteConfigRewriteLine(state,"sentinel parallel-syncs",line,1);
2136 /* rewriteConfigMarkAsProcessed is handled after the loop */
2137 }
2138
2139 /* sentinel notification-script */
2140 if (master->notification_script) {
2141 line = sdscatprintf(sdsempty(),
2142 "sentinel notification-script %s %s",
2143 master->name, master->notification_script);
2144 rewriteConfigRewriteLine(state,"sentinel notification-script",line,1);
2145 /* rewriteConfigMarkAsProcessed is handled after the loop */
2146 }
2147
2148 /* sentinel client-reconfig-script */
2149 if (master->client_reconfig_script) {
2150 line = sdscatprintf(sdsempty(),
2151 "sentinel client-reconfig-script %s %s",
2152 master->name, master->client_reconfig_script);
2153 rewriteConfigRewriteLine(state,"sentinel client-reconfig-script",line,1);
2154 /* rewriteConfigMarkAsProcessed is handled after the loop */
2155 }
2156
2157 /* sentinel auth-pass & auth-user */
2158 if (master->auth_pass) {
2159 line = sdscatprintf(sdsempty(),
2160 "sentinel auth-pass %s %s",
2161 master->name, master->auth_pass);
2162 rewriteConfigRewriteLine(state,"sentinel auth-pass",line,1);
2163 /* rewriteConfigMarkAsProcessed is handled after the loop */
2164 }
2165
2166 if (master->auth_user) {
2167 line = sdscatprintf(sdsempty(),
2168 "sentinel auth-user %s %s",
2169 master->name, master->auth_user);
2170 rewriteConfigRewriteLine(state,"sentinel auth-user",line,1);
2171 /* rewriteConfigMarkAsProcessed is handled after the loop */
2172 }
2173
2174 /* sentinel config-epoch */
2175 line = sdscatprintf(sdsempty(),
2176 "sentinel config-epoch %s %llu",
2177 master->name, (unsigned long long) master->config_epoch);
2178 rewriteConfigRewriteLine(state,"sentinel config-epoch",line,1);
2179 /* rewriteConfigMarkAsProcessed is handled after the loop */
2180
2181
2182 /* sentinel leader-epoch */
2183 line = sdscatprintf(sdsempty(),
2184 "sentinel leader-epoch %s %llu",
2185 master->name, (unsigned long long) master->leader_epoch);
2186 rewriteConfigRewriteLine(state,"sentinel leader-epoch",line,1);
2187 /* rewriteConfigMarkAsProcessed is handled after the loop */
2188
2189 /* sentinel known-slave */
2190 di2 = dictGetIterator(master->slaves);
2191 while((de = dictNext(di2)) != NULL((void*)0)) {
2192 sentinelAddr *slave_addr;
2193
2194 ri = dictGetVal(de)((de)->v.val);
2195 slave_addr = ri->addr;
2196
2197 /* If master_addr (obtained using sentinelGetCurrentMasterAddress()
2198 * so it may be the address of the promoted slave) is equal to this
2199 * slave's address, a failover is in progress and the slave was
2200 * already successfully promoted. So as the address of this slave
2201 * we use the old master address instead. */
2202 if (sentinelAddrIsEqual(slave_addr,master_addr))
2203 slave_addr = master->addr;
2204 line = sdscatprintf(sdsempty(),
2205 "sentinel known-replica %s %s %d",
2206 master->name, announceSentinelAddr(slave_addr), slave_addr->port);
2207 rewriteConfigRewriteLine(state,"sentinel known-replica",line,1);
2208 /* rewriteConfigMarkAsProcessed is handled after the loop */
2209 }
2210 dictReleaseIterator(di2);
2211
2212 /* sentinel known-sentinel */
2213 di2 = dictGetIterator(master->sentinels);
2214 while((de = dictNext(di2)) != NULL((void*)0)) {
2215 ri = dictGetVal(de)((de)->v.val);
2216 if (ri->runid == NULL((void*)0)) continue;
2217 line = sdscatprintf(sdsempty(),
2218 "sentinel known-sentinel %s %s %d %s",
2219 master->name, announceSentinelAddr(ri->addr), ri->addr->port, ri->runid);
2220 rewriteConfigRewriteLine(state,"sentinel known-sentinel",line,1);
2221 /* rewriteConfigMarkAsProcessed is handled after the loop */
2222 }
2223 dictReleaseIterator(di2);
2224
2225 /* sentinel rename-command */
2226 di2 = dictGetIterator(master->renamed_commands);
2227 while((de = dictNext(di2)) != NULL((void*)0)) {
2228 sds oldname = dictGetKey(de)((de)->key);
2229 sds newname = dictGetVal(de)((de)->v.val);
2230 line = sdscatprintf(sdsempty(),
2231 "sentinel rename-command %s %s %s",
2232 master->name, oldname, newname);
2233 rewriteConfigRewriteLine(state,"sentinel rename-command",line,1);
2234 /* rewriteConfigMarkAsProcessed is handled after the loop */
2235 }
2236 dictReleaseIterator(di2);
2237 }
2238
2239 /* sentinel current-epoch is a global state valid for all the masters. */
2240 line = sdscatprintf(sdsempty(),
2241 "sentinel current-epoch %llu", (unsigned long long) sentinel.current_epoch);
2242 rewriteConfigRewriteLine(state,"sentinel current-epoch",line,1);
2243
2244 /* sentinel announce-ip. */
2245 if (sentinel.announce_ip) {
2246 line = sdsnew("sentinel announce-ip ");
2247 line = sdscatrepr(line, sentinel.announce_ip, sdslen(sentinel.announce_ip));
2248 rewriteConfigRewriteLine(state,"sentinel announce-ip",line,1);
2249 } else {
2250 rewriteConfigMarkAsProcessed(state,"sentinel announce-ip");
2251 }
2252
2253 /* sentinel announce-port. */
2254 if (sentinel.announce_port) {
2255 line = sdscatprintf(sdsempty(),"sentinel announce-port %d",
2256 sentinel.announce_port);
2257 rewriteConfigRewriteLine(state,"sentinel announce-port",line,1);
2258 } else {
2259 rewriteConfigMarkAsProcessed(state,"sentinel announce-port");
2260 }
2261
2262 /* sentinel sentinel-user. */
2263 if (sentinel.sentinel_auth_user) {
2264 line = sdscatprintf(sdsempty(), "sentinel sentinel-user %s", sentinel.sentinel_auth_user);
2265 rewriteConfigRewriteLine(state,"sentinel sentinel-user",line,1);
2266 } else {
2267 rewriteConfigMarkAsProcessed(state,"sentinel sentinel-user");
2268 }
2269
2270 /* sentinel sentinel-pass. */
2271 if (sentinel.sentinel_auth_pass) {
2272 line = sdscatprintf(sdsempty(), "sentinel sentinel-pass %s", sentinel.sentinel_auth_pass);
2273 rewriteConfigRewriteLine(state,"sentinel sentinel-pass",line,1);
2274 } else {
2275 rewriteConfigMarkAsProcessed(state,"sentinel sentinel-pass");
2276 }
2277
2278 dictReleaseIterator(di);
2279
2280 /* NOTE: the purpose here is in case due to the state change, the config rewrite
2281 does not handle the configs, however, previously the config was set in the config file,
2282 rewriteConfigMarkAsProcessed should be put here to mark it as processed in order to
2283 delete the old config entry.
2284 */
2285 rewriteConfigMarkAsProcessed(state,"sentinel monitor");
2286 rewriteConfigMarkAsProcessed(state,"sentinel down-after-milliseconds");
2287 rewriteConfigMarkAsProcessed(state,"sentinel failover-timeout");
2288 rewriteConfigMarkAsProcessed(state,"sentinel parallel-syncs");
2289 rewriteConfigMarkAsProcessed(state,"sentinel notification-script");
2290 rewriteConfigMarkAsProcessed(state,"sentinel client-reconfig-script");
2291 rewriteConfigMarkAsProcessed(state,"sentinel auth-pass");
2292 rewriteConfigMarkAsProcessed(state,"sentinel auth-user");
2293 rewriteConfigMarkAsProcessed(state,"sentinel config-epoch");
2294 rewriteConfigMarkAsProcessed(state,"sentinel leader-epoch");
2295 rewriteConfigMarkAsProcessed(state,"sentinel known-replica");
2296 rewriteConfigMarkAsProcessed(state,"sentinel known-sentinel");
2297 rewriteConfigMarkAsProcessed(state,"sentinel rename-command");
2298}
2299
2300/* This function uses the config rewriting Redis engine in order to persist
2301 * the state of the Sentinel in the current configuration file.
2302 *
2303 * Before returning the function calls fsync() against the generated
2304 * configuration file to make sure changes are committed to disk.
2305 *
2306 * On failure the function logs a warning on the Redis log. */
2307void sentinelFlushConfig(void) {
2308 int fd = -1;
2309 int saved_hz = server.hz;
2310 int rewrite_status;
2311
2312 server.hz = CONFIG_DEFAULT_HZ10;
2313 rewrite_status = rewriteConfig(server.configfile, 0);
2314 server.hz = saved_hz;
2315
2316 if (rewrite_status == -1) goto werr;
2317 if ((fd = open(server.configfile,O_RDONLY00)) == -1) goto werr;
2318 if (fsync(fd) == -1) goto werr;
2319 if (close(fd) == EOF(-1)) goto werr;
2320 return;
2321
2322werr:
2323 if (fd != -1) close(fd);
2324 serverLog(LL_WARNING3,"WARNING: Sentinel was not able to save the new configuration on disk!!!: %s", strerror(errno(*__errno_location ())));
2325}
2326
2327/* ====================== hiredis connection handling ======================= */
2328
2329/* Send the AUTH command with the specified master password if needed.
2330 * Note that for slaves the password set for the master is used.
2331 *
2332 * In case this Sentinel requires a password as well, via the "requirepass"
2333 * configuration directive, we assume we should use the local password in
2334 * order to authenticate when connecting with the other Sentinels as well.
2335 * So basically all the Sentinels share the same password and use it to
2336 * authenticate reciprocally.
2337 *
2338 * We don't check at all if the command was successfully transmitted
2339 * to the instance as if it fails Sentinel will detect the instance down,
2340 * will disconnect and reconnect the link and so forth. */
2341void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
2342 char *auth_pass = NULL((void*)0);
2343 char *auth_user = NULL((void*)0);
2344
2345 if (ri->flags & SRI_MASTER(1<<0)) {
2346 auth_pass = ri->auth_pass;
2347 auth_user = ri->auth_user;
2348 } else if (ri->flags & SRI_SLAVE(1<<1)) {
2349 auth_pass = ri->master->auth_pass;
2350 auth_user = ri->master->auth_user;
2351 } else if (ri->flags & SRI_SENTINEL(1<<2)) {
2352 /* If sentinel_auth_user is NULL, AUTH will use default user
2353 with sentinel_auth_pass to authenticate */
2354 if (sentinel.sentinel_auth_pass) {
2355 auth_pass = sentinel.sentinel_auth_pass;
2356 auth_user = sentinel.sentinel_auth_user;
2357 } else {
2358 /* Compatibility with old configs. requirepass is used
2359 * for both incoming and outgoing authentication. */
2360 auth_pass = server.requirepass;
2361 auth_user = NULL((void*)0);
2362 }
2363 }
2364
2365 if (auth_pass && auth_user == NULL((void*)0)) {
2366 if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri, "%s %s",
2367 sentinelInstanceMapCommand(ri,"AUTH"),
2368 auth_pass) == C_OK0) ri->link->pending_commands++;
2369 } else if (auth_pass && auth_user) {
2370 /* If we also have an username, use the ACL-style AUTH command
2371 * with two arguments, username and password. */
2372 if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri, "%s %s %s",
2373 sentinelInstanceMapCommand(ri,"AUTH"),
2374 auth_user, auth_pass) == C_OK0) ri->link->pending_commands++;
2375 }
2376}
2377
2378/* Use CLIENT SETNAME to name the connection in the Redis instance as
2379 * sentinel-<first_8_chars_of_runid>-<connection_type>
2380 * The connection type is "cmd" or "pubsub" as specified by 'type'.
2381 *
2382 * This makes it possible to list all the sentinel instances connected
2383 * to a Redis server with CLIENT LIST, grepping for a specific name format. */
2384void sentinelSetClientName(sentinelRedisInstance *ri, redisAsyncContext *c, char *type) {
2385 char name[64];
2386
2387 snprintf(name,sizeof(name),"sentinel-%.8s-%s",sentinel.myid,type);
2388 if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri,
2389 "%s SETNAME %s",
2390 sentinelInstanceMapCommand(ri,"CLIENT"),
2391 name) == C_OK0)
2392 {
2393 ri->link->pending_commands++;
2394 }
2395}
2396
2397static int instanceLinkNegotiateTLS(redisAsyncContext *context) {
2398#ifndef USE_OPENSSL
2399 (void) context;
2400#else
2401 if (!redis_tls_ctx) return C_ERR-1;
2402 SSL *ssl = SSL_new(redis_tls_client_ctx ? redis_tls_client_ctx : redis_tls_ctx);
2403 if (!ssl) return C_ERR-1;
2404
2405 if (redisInitiateSSL(&context->c, ssl) == REDIS_ERR-1) return C_ERR-1;
2406#endif
2407 return C_OK0;
2408}
2409
2410/* Create the async connections for the instance link if the link
2411 * is disconnected. Note that link->disconnected is true even if just
2412 * one of the two links (commands and pub/sub) is missing. */
2413void sentinelReconnectInstance(sentinelRedisInstance *ri) {
2414 if (ri->link->disconnected == 0) return;
2415 if (ri->addr->port == 0) return; /* port == 0 means invalid address. */
2416 instanceLink *link = ri->link;
2417 mstime_t now = mstime();
2418
2419 if (now - ri->link->last_reconn_time < SENTINEL_PING_PERIOD1000) return;
2420 ri->link->last_reconn_time = now;
2421
2422 /* Commands connection. */
2423 if (link->cc == NULL((void*)0)) {
2424 link->cc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,NET_FIRST_BIND_ADDR(server.bindaddr_count ? server.bindaddr[0] : ((void*)0)));
2425 if (!link->cc->err) anetCloexec(link->cc->c.fd);
2426 if (!link->cc->err && server.tls_replication &&
2427 (instanceLinkNegotiateTLS(link->cc) == C_ERR-1)) {
2428 sentinelEvent(LL_DEBUG0,"-cmd-link-reconnection",ri,"%@ #Failed to initialize TLS");
2429 instanceLinkCloseConnection(link,link->cc);
2430 } else if (link->cc->err) {
2431 sentinelEvent(LL_DEBUG0,"-cmd-link-reconnection",ri,"%@ #%s",
2432 link->cc->errstr);
2433 instanceLinkCloseConnection(link,link->cc);
2434 } else {
2435 link->pending_commands = 0;
2436 link->cc_conn_time = mstime();
2437 link->cc->data = link;
2438 redisAeAttach(server.el,link->cc);
2439 redisAsyncSetConnectCallback(link->cc,
2440 sentinelLinkEstablishedCallback);
2441 redisAsyncSetDisconnectCallback(link->cc,
2442 sentinelDisconnectCallback);
2443 sentinelSendAuthIfNeeded(ri,link->cc);
2444 sentinelSetClientName(ri,link->cc,"cmd");
2445
2446 /* Send a PING ASAP when reconnecting. */
2447 sentinelSendPing(ri);
2448 }
2449 }
2450 /* Pub / Sub */
2451 if ((ri->flags & (SRI_MASTER(1<<0)|SRI_SLAVE(1<<1))) && link->pc == NULL((void*)0)) {
2452 link->pc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,NET_FIRST_BIND_ADDR(server.bindaddr_count ? server.bindaddr[0] : ((void*)0)));
2453 if (!link->pc->err) anetCloexec(link->pc->c.fd);
2454 if (!link->pc->err && server.tls_replication &&
2455 (instanceLinkNegotiateTLS(link->pc) == C_ERR-1)) {
2456 sentinelEvent(LL_DEBUG0,"-pubsub-link-reconnection",ri,"%@ #Failed to initialize TLS");
2457 } else if (link->pc->err) {
2458 sentinelEvent(LL_DEBUG0,"-pubsub-link-reconnection",ri,"%@ #%s",
2459 link->pc->errstr);
2460 instanceLinkCloseConnection(link,link->pc);
2461 } else {
2462 int retval;
2463 link->pc_conn_time = mstime();
2464 link->pc->data = link;
2465 redisAeAttach(server.el,link->pc);
2466 redisAsyncSetConnectCallback(link->pc,
2467 sentinelLinkEstablishedCallback);
2468 redisAsyncSetDisconnectCallback(link->pc,
2469 sentinelDisconnectCallback);
2470 sentinelSendAuthIfNeeded(ri,link->pc);
2471 sentinelSetClientName(ri,link->pc,"pubsub");
2472 /* Now we subscribe to the Sentinels "Hello" channel. */
2473 retval = redisAsyncCommand(link->pc,
2474 sentinelReceiveHelloMessages, ri, "%s %s",
2475 sentinelInstanceMapCommand(ri,"SUBSCRIBE"),
2476 SENTINEL_HELLO_CHANNEL"__sentinel__:hello");
2477 if (retval != C_OK0) {
2478 /* If we can't subscribe, the Pub/Sub connection is useless
2479 * and we can simply disconnect it and try again. */
2480 instanceLinkCloseConnection(link,link->pc);
2481 return;
2482 }
2483 }
2484 }
2485 /* Clear the disconnected status only if we have both the connections
2486 * (or just the commands connection if this is a sentinel instance). */
2487 if (link->cc && (ri->flags & SRI_SENTINEL(1<<2) || link->pc))
2488 link->disconnected = 0;
2489}
2490
2491/* ======================== Redis instances pinging ======================== */
2492
2493/* Return true if master looks "sane", that is:
2494 * 1) It is actually a master in the current configuration.
2495 * 2) It reports itself as a master.
2496 * 3) It is not SDOWN or ODOWN.
2497 * 4) We obtained last INFO no more than two times the INFO period time ago. */
2498int sentinelMasterLooksSane(sentinelRedisInstance *master) {
2499 return
2500 master->flags & SRI_MASTER(1<<0) &&
2501 master->role_reported == SRI_MASTER(1<<0) &&
2502 (master->flags & (SRI_S_DOWN(1<<3)|SRI_O_DOWN(1<<4))) == 0 &&
2503 (mstime() - master->info_refresh) < SENTINEL_INFO_PERIOD10000*2;
2504}
2505
2506/* Process the INFO output from masters. */
2507void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
2508 sds *lines;
2509 int numlines, j;
2510 int role = 0;
2511
2512 /* cache full INFO output for instance */
2513 sdsfree(ri->info);
2514 ri->info = sdsnew(info);
2515
2516 /* The following fields must be reset to a given value in the case they
2517 * are not found at all in the INFO output. */
2518 ri->master_link_down_time = 0;
2519
2520 /* Process line by line. */
2521 lines = sdssplitlen(info,strlen(info),"\r\n",2,&numlines);
2522 for (j = 0; j < numlines; j++) {
2523 sentinelRedisInstance *slave;
2524 sds l = lines[j];
2525
2526 /* run_id:<40 hex chars>*/
2527 if (sdslen(l) >= 47 && !memcmp(l,"run_id:",7)) {
2528 if (ri->runid == NULL((void*)0)) {
2529 ri->runid = sdsnewlen(l+7,40);
2530 } else {
2531 if (strncmp(ri->runid,l+7,40) != 0) {
2532 sentinelEvent(LL_NOTICE2,"+reboot",ri,"%@");
2533 sdsfree(ri->runid);
2534 ri->runid = sdsnewlen(l+7,40);
2535 }
2536 }
2537 }
2538
2539 /* old versions: slave0:<ip>,<port>,<state>
2540 * new versions: slave0:ip=127.0.0.1,port=9999,... */
2541 if ((ri->flags & SRI_MASTER(1<<0)) &&
2542 sdslen(l) >= 7 &&
2543 !memcmp(l,"slave",5) && isdigit(l[5])((*__ctype_b_loc ())[(int) ((l[5]))] & (unsigned short int
) _ISdigit)
)
2544 {
2545 char *ip, *port, *end;
2546
2547 if (strstr(l,"ip=") == NULL((void*)0)) {
2548 /* Old format. */
2549 ip = strchr(l,':'); if (!ip) continue;
2550 ip++; /* Now ip points to start of ip address. */
2551 port = strchr(ip,','); if (!port) continue;
2552 *port = '\0'; /* nul term for easy access. */
2553 port++; /* Now port points to start of port number. */
2554 end = strchr(port,','); if (!end) continue;
2555 *end = '\0'; /* nul term for easy access. */
2556 } else {
2557 /* New format. */
2558 ip = strstr(l,"ip="); if (!ip) continue;
2559 ip += 3; /* Now ip points to start of ip address. */
2560 port = strstr(l,"port="); if (!port) continue;
2561 port += 5; /* Now port points to start of port number. */
2562 /* Nul term both fields for easy access. */
2563 end = strchr(ip,','); if (end) *end = '\0';
2564 end = strchr(port,','); if (end) *end = '\0';
2565 }
2566
2567 /* Check if we already have this slave into our table,
2568 * otherwise add it. */
2569 if (sentinelRedisInstanceLookupSlave(ri,ip,atoi(port)) == NULL((void*)0)) {
2570 if ((slave = createSentinelRedisInstance(NULL((void*)0),SRI_SLAVE(1<<1),ip,
2571 atoi(port), ri->quorum, ri)) != NULL((void*)0))
2572 {
2573 sentinelEvent(LL_NOTICE2,"+slave",slave,"%@");
2574 sentinelFlushConfig();
2575 }
2576 }
2577 }
2578
2579 /* master_link_down_since_seconds:<seconds> */
2580 if (sdslen(l) >= 32 &&
2581 !memcmp(l,"master_link_down_since_seconds",30))
2582 {
2583 ri->master_link_down_time = strtoll(l+31,NULL((void*)0),10)*1000;
2584 }
2585
2586 /* role:<role> */
2587 if (sdslen(l) >= 11 && !memcmp(l,"role:master",11)) role = SRI_MASTER(1<<0);
2588 else if (sdslen(l) >= 10 && !memcmp(l,"role:slave",10)) role = SRI_SLAVE(1<<1);
2589
2590 if (role == SRI_SLAVE(1<<1)) {
2591 /* master_host:<host> */
2592 if (sdslen(l) >= 12 && !memcmp(l,"master_host:",12)) {
2593 if (ri->slave_master_host == NULL((void*)0) ||
2594 strcasecmp(l+12,ri->slave_master_host))
2595 {
2596 sdsfree(ri->slave_master_host);
2597 ri->slave_master_host = sdsnew(l+12);
2598 ri->slave_conf_change_time = mstime();
2599 }
2600 }
2601
2602 /* master_port:<port> */
2603 if (sdslen(l) >= 12 && !memcmp(l,"master_port:",12)) {
2604 int slave_master_port = atoi(l+12);
2605
2606 if (ri->slave_master_port != slave_master_port) {
2607 ri->slave_master_port = slave_master_port;
2608 ri->slave_conf_change_time = mstime();
2609 }
2610 }
2611
2612 /* master_link_status:<status> */
2613 if (sdslen(l) >= 19 && !memcmp(l,"master_link_status:",19)) {
2614 ri->slave_master_link_status =
2615 (strcasecmp(l+19,"up") == 0) ?
2616 SENTINEL_MASTER_LINK_STATUS_UP0 :
2617 SENTINEL_MASTER_LINK_STATUS_DOWN1;
2618 }
2619
2620 /* slave_priority:<priority> */
2621 if (sdslen(l) >= 15 && !memcmp(l,"slave_priority:",15))
2622 ri->slave_priority = atoi(l+15);
2623
2624 /* slave_repl_offset:<offset> */
2625 if (sdslen(l) >= 18 && !memcmp(l,"slave_repl_offset:",18))
2626 ri->slave_repl_offset = strtoull(l+18,NULL((void*)0),10);
2627 }
2628 }
2629 ri->info_refresh = mstime();
2630 sdsfreesplitres(lines,numlines);
2631
2632 /* ---------------------------- Acting half -----------------------------
2633 * Some things will not happen if sentinel.tilt is true, but some will
2634 * still be processed. */
2635
2636 /* Remember when the role changed. */
2637 if (role != ri->role_reported) {
2638 ri->role_reported_time = mstime();
2639 ri->role_reported = role;
2640 if (role == SRI_SLAVE(1<<1)) ri->slave_conf_change_time = mstime();
2641 /* Log the event with +role-change if the new role is coherent or
2642 * with -role-change if there is a mismatch with the current config. */
2643 sentinelEvent(LL_VERBOSE1,
2644 ((ri->flags & (SRI_MASTER(1<<0)|SRI_SLAVE(1<<1))) == role) ?
2645 "+role-change" : "-role-change",
2646 ri, "%@ new reported role is %s",
2647 role == SRI_MASTER(1<<0) ? "master" : "slave",
2648 ri->flags & SRI_MASTER(1<<0) ? "master" : "slave");
2649 }
2650
2651 /* None of the following conditions are processed when in tilt mode, so
2652 * return asap. */
2653 if (sentinel.tilt) return;
2654
2655 /* Handle master -> slave role switch. */
2656 if ((ri->flags & SRI_MASTER(1<<0)) && role == SRI_SLAVE(1<<1)) {
2657 /* Nothing to do, but masters claiming to be slaves are
2658 * considered to be unreachable by Sentinel, so eventually
2659 * a failover will be triggered. */
2660 }
2661
2662 /* Handle slave -> master role switch. */
2663 if ((ri->flags & SRI_SLAVE(1<<1)) && role == SRI_MASTER(1<<0)) {
2664 /* If this is a promoted slave we can change state to the
2665 * failover state machine. */
2666 if ((ri->flags & SRI_PROMOTED(1<<7)) &&
2667 (ri->master->flags & SRI_FAILOVER_IN_PROGRESS(1<<6)) &&
2668 (ri->master->failover_state ==
2669 SENTINEL_FAILOVER_STATE_WAIT_PROMOTION4))
2670 {
2671 /* Now that we are sure the slave was reconfigured as a master
2672 * set the master configuration epoch to the epoch we won the
2673 * election to perform this failover. This will force the other
2674 * Sentinels to update their config (assuming there is not
2675 * a newer one already available). */
2676 ri->master->config_epoch = ri->master->failover_epoch;
2677 ri->master->failover_state = SENTINEL_FAILOVER_STATE_RECONF_SLAVES5;
2678 ri->master->failover_state_change_time = mstime();
2679 sentinelFlushConfig();
2680 sentinelEvent(LL_WARNING3,"+promoted-slave",ri,"%@");
2681 if (sentinel.simfailure_flags &
2682 SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION(1<<1))
2683 sentinelSimFailureCrash();
2684 sentinelEvent(LL_WARNING3,"+failover-state-reconf-slaves",
2685 ri->master,"%@");
2686 sentinelCallClientReconfScript(ri->master,SENTINEL_LEADER(1<<17),
2687 "start",ri->master->addr,ri->addr);
2688 sentinelForceHelloUpdateForMaster(ri->master);
2689 } else {
2690 /* A slave turned into a master. We want to force our view and
2691 * reconfigure as slave. Wait some time after the change before
2692 * going forward, to receive new configs if any. */
2693 mstime_t wait_time = SENTINEL_PUBLISH_PERIOD2000*4;
2694
2695 if (!(ri->flags & SRI_PROMOTED(1<<7)) &&
2696 sentinelMasterLooksSane(ri->master) &&
2697 sentinelRedisInstanceNoDownFor(ri,wait_time) &&
2698 mstime() - ri->role_reported_time > wait_time)
2699 {
2700 int retval = sentinelSendSlaveOf(ri,ri->master->addr);
2701 if (retval == C_OK0)
2702 sentinelEvent(LL_NOTICE2,"+convert-to-slave",ri,"%@");
2703 }
2704 }
2705 }
2706
2707 /* Handle slaves replicating to a different master address. */
2708 if ((ri->flags & SRI_SLAVE(1<<1)) &&
2709 role == SRI_SLAVE(1<<1) &&
2710 (ri->slave_master_port != ri->master->addr->port ||
2711 !sentinelAddrEqualsHostname(ri->master->addr, ri->slave_master_host)))
2712 {
2713 mstime_t wait_time = ri->master->failover_timeout;
2714
2715 /* Make sure the master is sane before reconfiguring this instance
2716 * into a slave. */
2717 if (sentinelMasterLooksSane(ri->master) &&
2718 sentinelRedisInstanceNoDownFor(ri,wait_time) &&
2719 mstime() - ri->slave_conf_change_time > wait_time)
2720 {
2721 int retval = sentinelSendSlaveOf(ri,ri->master->addr);
2722 if (retval == C_OK0)
2723 sentinelEvent(LL_NOTICE2,"+fix-slave-config",ri,"%@");
2724 }
2725 }
2726
2727 /* Detect if the slave that is in the process of being reconfigured
2728 * changed state. */
2729 if ((ri->flags & SRI_SLAVE(1<<1)) && role == SRI_SLAVE(1<<1) &&
2730 (ri->flags & (SRI_RECONF_SENT(1<<8)|SRI_RECONF_INPROG(1<<9))))
2731 {
2732 /* SRI_RECONF_SENT -> SRI_RECONF_INPROG. */
2733 if ((ri->flags & SRI_RECONF_SENT(1<<8)) &&
2734 ri->slave_master_host &&
2735 sentinelAddrEqualsHostname(ri->master->promoted_slave->addr,
2736 ri->slave_master_host) &&
2737 ri->slave_master_port == ri->master->promoted_slave->addr->port)
2738 {
2739 ri->flags &= ~SRI_RECONF_SENT(1<<8);
2740 ri->flags |= SRI_RECONF_INPROG(1<<9);
2741 sentinelEvent(LL_NOTICE2,"+slave-reconf-inprog",ri,"%@");
2742 }
2743
2744 /* SRI_RECONF_INPROG -> SRI_RECONF_DONE */
2745 if ((ri->flags & SRI_RECONF_INPROG(1<<9)) &&
2746 ri->slave_master_link_status == SENTINEL_MASTER_LINK_STATUS_UP0)
2747 {
2748 ri->flags &= ~SRI_RECONF_INPROG(1<<9);
2749 ri->flags |= SRI_RECONF_DONE(1<<10);
2750 sentinelEvent(LL_NOTICE2,"+slave-reconf-done",ri,"%@");
2751 }
2752 }
2753}
2754
2755void sentinelInfoReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
2756 sentinelRedisInstance *ri = privdata;
2757 instanceLink *link = c->data;
2758 redisReply *r;
2759
2760 if (!reply || !link) return;
2761 link->pending_commands--;
2762 r = reply;
2763
2764 if (r->type == REDIS_REPLY_STRING1)
2765 sentinelRefreshInstanceInfo(ri,r->str);
2766}
2767
2768/* Just discard the reply. We use this when we are not monitoring the return
2769 * value of the command but its effects directly. */
2770void sentinelDiscardReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
2771 instanceLink *link = c->data;
2772 UNUSED(reply)((void) reply);
2773 UNUSED(privdata)((void) privdata);
2774
2775 if (link) link->pending_commands--;
2776}
2777
2778void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
2779 sentinelRedisInstance *ri = privdata;
2780 instanceLink *link = c->data;
2781 redisReply *r;
2782
2783 if (!reply || !link) return;
2784 link->pending_commands--;
2785 r = reply;
2786
2787 if (r->type == REDIS_REPLY_STATUS5 ||
2788 r->type == REDIS_REPLY_ERROR6) {
2789 /* Update the "instance available" field only if this is an
2790 * acceptable reply. */
2791 if (strncmp(r->str,"PONG",4) == 0 ||
2792 strncmp(r->str,"LOADING",7) == 0 ||
2793 strncmp(r->str,"MASTERDOWN",10) == 0)
2794 {
2795 link->last_avail_time = mstime();
2796 link->act_ping_time = 0; /* Flag the pong as received. */
2797 } else {
2798 /* Send a SCRIPT KILL command if the instance appears to be
2799 * down because of a busy script. */
2800 if (strncmp(r->str,"BUSY",4) == 0 &&
2801 (ri->flags & SRI_S_DOWN(1<<3)) &&
2802 !(ri->flags & SRI_SCRIPT_KILL_SENT(1<<12)))
2803 {
2804 if (redisAsyncCommand(ri->link->cc,
2805 sentinelDiscardReplyCallback, ri,
2806 "%s KILL",
2807 sentinelInstanceMapCommand(ri,"SCRIPT")) == C_OK0)
2808 {
2809 ri->link->pending_commands++;
2810 }
2811 ri->flags |= SRI_SCRIPT_KILL_SENT(1<<12);
2812 }
2813 }
2814 }
2815 link->last_pong_time = mstime();
2816}
2817
2818/* This is called when we get the reply about the PUBLISH command we send
2819 * to the master to advertise this sentinel. */
2820void sentinelPublishReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
2821 sentinelRedisInstance *ri = privdata;
2822 instanceLink *link = c->data;
2823 redisReply *r;
2824
2825 if (!reply || !link) return;
2826 link->pending_commands--;
2827 r = reply;
2828
2829 /* Only update pub_time if we actually published our message. Otherwise
2830 * we'll retry again in 100 milliseconds. */
2831 if (r->type != REDIS_REPLY_ERROR6)
2832 ri->last_pub_time = mstime();
2833}
2834
2835/* Process a hello message received via Pub/Sub in master or slave instance,
2836 * or sent directly to this sentinel via the (fake) PUBLISH command of Sentinel.
2837 *
2838 * If the master name specified in the message is not known, the message is
2839 * discarded. */
2840void sentinelProcessHelloMessage(char *hello, int hello_len) {
2841 /* Format is composed of 8 tokens:
2842 * 0=ip,1=port,2=runid,3=current_epoch,4=master_name,
2843 * 5=master_ip,6=master_port,7=master_config_epoch. */
2844 int numtokens, port, removed, master_port;
2845 uint64_t current_epoch, master_config_epoch;
2846 char **token = sdssplitlen(hello, hello_len, ",", 1, &numtokens);
2847 sentinelRedisInstance *si, *master;
2848
2849 if (numtokens == 8) {
2850 /* Obtain a reference to the master this hello message is about */
2851 master = sentinelGetMasterByName(token[4]);
2852 if (!master) goto cleanup; /* Unknown master, skip the message. */
2853
2854 /* First, try to see if we already have this sentinel. */
2855 port = atoi(token[1]);
2856 master_port = atoi(token[6]);
2857 si = getSentinelRedisInstanceByAddrAndRunID(
2858 master->sentinels,token[0],port,token[2]);
2859 current_epoch = strtoull(token[3],NULL((void*)0),10);
2860 master_config_epoch = strtoull(token[7],NULL((void*)0),10);
2861
2862 if (!si) {
2863 /* If not, remove all the sentinels that have the same runid
2864 * because there was an address change, and add the same Sentinel
2865 * with the new address back. */
2866 removed = removeMatchingSentinelFromMaster(master,token[2]);
2867 if (removed) {
2868 sentinelEvent(LL_NOTICE2,"+sentinel-address-switch",master,
2869 "%@ ip %s port %d for %s", token[0],port,token[2]);
2870 } else {
2871 /* Check if there is another Sentinel with the same address this
2872 * new one is reporting. What we do if this happens is to set its
2873 * port to 0, to signal the address is invalid. We'll update it
2874 * later if we get an HELLO message. */
2875 sentinelRedisInstance *other =
2876 getSentinelRedisInstanceByAddrAndRunID(
2877 master->sentinels, token[0],port,NULL((void*)0));
2878 if (other) {
2879 sentinelEvent(LL_NOTICE2,"+sentinel-invalid-addr",other,"%@");
2880 other->addr->port = 0; /* It means: invalid address. */
2881 sentinelUpdateSentinelAddressInAllMasters(other);
2882 }
2883 }
2884
2885 /* Add the new sentinel. */
2886 si = createSentinelRedisInstance(token[2],SRI_SENTINEL(1<<2),
2887 token[0],port,master->quorum,master);
2888
2889 if (si) {
2890 if (!removed) sentinelEvent(LL_NOTICE2,"+sentinel",si,"%@");
2891 /* The runid is NULL after a new instance creation and
2892 * for Sentinels we don't have a later chance to fill it,
2893 * so do it now. */
2894 si->runid = sdsnew(token[2]);
2895 sentinelTryConnectionSharing(si);
2896 if (removed) sentinelUpdateSentinelAddressInAllMasters(si);
2897 sentinelFlushConfig();
2898 }
2899 }
2900
2901 /* Update local current_epoch if received current_epoch is greater.*/
2902 if (current_epoch > sentinel.current_epoch) {
2903 sentinel.current_epoch = current_epoch;
2904 sentinelFlushConfig();
2905 sentinelEvent(LL_WARNING3,"+new-epoch",master,"%llu",
2906 (unsigned long long) sentinel.current_epoch);
2907 }
2908
2909 /* Update master info if received configuration is newer. */
2910 if (si && master->config_epoch < master_config_epoch) {
2911 master->config_epoch = master_config_epoch;
2912 if (master_port != master->addr->port ||
2913 !sentinelAddrEqualsHostname(master->addr, token[5]))
2914 {
2915 sentinelAddr *old_addr;
2916
2917 sentinelEvent(LL_WARNING3,"+config-update-from",si,"%@");
2918 sentinelEvent(LL_WARNING3,"+switch-master",
2919 master,"%s %s %d %s %d",
2920 master->name,
2921 announceSentinelAddr(master->addr), master->addr->port,
2922 token[5], master_port);
2923
2924 old_addr = dupSentinelAddr(master->addr);
2925 sentinelResetMasterAndChangeAddress(master, token[5], master_port);
2926 sentinelCallClientReconfScript(master,
2927 SENTINEL_OBSERVER(1<<18),"start",
2928 old_addr,master->addr);
2929 releaseSentinelAddr(old_addr);
2930 }
2931 }
2932
2933 /* Update the state of the Sentinel. */
2934 if (si) si->last_hello_time = mstime();
2935 }
2936
2937cleanup:
2938 sdsfreesplitres(token,numtokens);
2939}
2940
2941
2942/* This is our Pub/Sub callback for the Hello channel. It's useful in order
2943 * to discover other sentinels attached at the same master. */
2944void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privdata) {
2945 sentinelRedisInstance *ri = privdata;
2946 redisReply *r;
2947 UNUSED(c)((void) c);
2948
2949 if (!reply || !ri) return;
2950 r = reply;
2951
2952 /* Update the last activity in the pubsub channel. Note that since we
2953 * receive our messages as well this timestamp can be used to detect
2954 * if the link is probably disconnected even if it seems otherwise. */
2955 ri->link->pc_last_activity = mstime();
2956
2957 /* Sanity check in the reply we expect, so that the code that follows
2958 * can avoid to check for details. */
2959 if (r->type != REDIS_REPLY_ARRAY2 ||
2960 r->elements != 3 ||
2961 r->element[0]->type != REDIS_REPLY_STRING1 ||
2962 r->element[1]->type != REDIS_REPLY_STRING1 ||
2963 r->element[2]->type != REDIS_REPLY_STRING1 ||
2964 strcmp(r->element[0]->str,"message") != 0) return;
2965
2966 /* We are not interested in meeting ourselves */
2967 if (strstr(r->element[2]->str,sentinel.myid) != NULL((void*)0)) return;
2968
2969 sentinelProcessHelloMessage(r->element[2]->str, r->element[2]->len);
2970}
2971
2972/* Send a "Hello" message via Pub/Sub to the specified 'ri' Redis
2973 * instance in order to broadcast the current configuration for this
2974 * master, and to advertise the existence of this Sentinel at the same time.
2975 *
2976 * The message has the following format:
2977 *
2978 * sentinel_ip,sentinel_port,sentinel_runid,current_epoch,
2979 * master_name,master_ip,master_port,master_config_epoch.
2980 *
2981 * Returns C_OK if the PUBLISH was queued correctly, otherwise
2982 * C_ERR is returned. */
2983int sentinelSendHello(sentinelRedisInstance *ri) {
2984 char ip[NET_IP_STR_LEN46];
2985 char payload[NET_IP_STR_LEN46+1024];
2986 int retval;
2987 char *announce_ip;
2988 int announce_port;
2989 sentinelRedisInstance *master = (ri->flags & SRI_MASTER(1<<0)) ? ri : ri->master;
2990 sentinelAddr *master_addr = sentinelGetCurrentMasterAddress(master);
2991
2992 if (ri->link->disconnected) return C_ERR-1;
2993
2994 /* Use the specified announce address if specified, otherwise try to
2995 * obtain our own IP address. */
2996 if (sentinel.announce_ip) {
2997 announce_ip = sentinel.announce_ip;
2998 } else {
2999 if (anetFdToString(ri->link->cc->c.fd,ip,sizeof(ip),NULL((void*)0),FD_TO_SOCK_NAME1) == -1)
3000 return C_ERR-1;
3001 announce_ip = ip;
3002 }
3003 if (sentinel.announce_port) announce_port = sentinel.announce_port;
3004 else if (server.tls_replication && server.tls_port) announce_port = server.tls_port;
3005 else announce_port = server.port;
3006
3007 /* Format and send the Hello message. */
3008 snprintf(payload,sizeof(payload),
3009 "%s,%d,%s,%llu," /* Info about this sentinel. */
3010 "%s,%s,%d,%llu", /* Info about current master. */
3011 announce_ip, announce_port, sentinel.myid,
3012 (unsigned long long) sentinel.current_epoch,
3013 /* --- */
3014 master->name,announceSentinelAddr(master_addr),master_addr->port,
3015 (unsigned long long) master->config_epoch);
3016 retval = redisAsyncCommand(ri->link->cc,
3017 sentinelPublishReplyCallback, ri, "%s %s %s",
3018 sentinelInstanceMapCommand(ri,"PUBLISH"),
3019 SENTINEL_HELLO_CHANNEL"__sentinel__:hello",payload);
3020 if (retval != C_OK0) return C_ERR-1;
3021 ri->link->pending_commands++;
3022 return C_OK0;
3023}
3024
3025/* Reset last_pub_time in all the instances in the specified dictionary
3026 * in order to force the delivery of a Hello update ASAP. */
3027void sentinelForceHelloUpdateDictOfRedisInstances(dict *instances) {
3028 dictIterator *di;
3029 dictEntry *de;
3030
3031 di = dictGetSafeIterator(instances);
3032 while((de = dictNext(di)) != NULL((void*)0)) {
3033 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
3034 if (ri->last_pub_time >= (SENTINEL_PUBLISH_PERIOD2000+1))
3035 ri->last_pub_time -= (SENTINEL_PUBLISH_PERIOD2000+1);
3036 }
3037 dictReleaseIterator(di);
3038}
3039
3040/* This function forces the delivery of a "Hello" message (see
3041 * sentinelSendHello() top comment for further information) to all the Redis
3042 * and Sentinel instances related to the specified 'master'.
3043 *
3044 * It is technically not needed since we send an update to every instance
3045 * with a period of SENTINEL_PUBLISH_PERIOD milliseconds, however when a
3046 * Sentinel upgrades a configuration it is a good idea to deliver an update
3047 * to the other Sentinels ASAP. */
3048int sentinelForceHelloUpdateForMaster(sentinelRedisInstance *master) {
3049 if (!(master->flags & SRI_MASTER(1<<0))) return C_ERR-1;
3050 if (master->last_pub_time >= (SENTINEL_PUBLISH_PERIOD2000+1))
3051 master->last_pub_time -= (SENTINEL_PUBLISH_PERIOD2000+1);
3052 sentinelForceHelloUpdateDictOfRedisInstances(master->sentinels);
3053 sentinelForceHelloUpdateDictOfRedisInstances(master->slaves);
3054 return C_OK0;
3055}
3056
3057/* Send a PING to the specified instance and refresh the act_ping_time
3058 * if it is zero (that is, if we received a pong for the previous ping).
3059 *
3060 * On error zero is returned, and we can't consider the PING command
3061 * queued in the connection. */
3062int sentinelSendPing(sentinelRedisInstance *ri) {
3063 int retval = redisAsyncCommand(ri->link->cc,
3064 sentinelPingReplyCallback, ri, "%s",
3065 sentinelInstanceMapCommand(ri,"PING"));
3066 if (retval == C_OK0) {
3067 ri->link->pending_commands++;
3068 ri->link->last_ping_time = mstime();
3069 /* We update the active ping time only if we received the pong for
3070 * the previous ping, otherwise we are technically waiting since the
3071 * first ping that did not receive a reply. */
3072 if (ri->link->act_ping_time == 0)
3073 ri->link->act_ping_time = ri->link->last_ping_time;
3074 return 1;
3075 } else {
3076 return 0;
3077 }
3078}
3079
3080/* Send periodic PING, INFO, and PUBLISH to the Hello channel to
3081 * the specified master or slave instance. */
3082void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
3083 mstime_t now = mstime();
3084 mstime_t info_period, ping_period;
3085 int retval;
3086
3087 /* Return ASAP if we have already a PING or INFO already pending, or
3088 * in the case the instance is not properly connected. */
3089 if (ri->link->disconnected) return;
3090
3091 /* For INFO, PING, PUBLISH that are not critical commands to send we
3092 * also have a limit of SENTINEL_MAX_PENDING_COMMANDS. We don't
3093 * want to use a lot of memory just because a link is not working
3094 * properly (note that anyway there is a redundant protection about this,
3095 * that is, the link will be disconnected and reconnected if a long
3096 * timeout condition is detected. */
3097 if (ri->link->pending_commands >=
3098 SENTINEL_MAX_PENDING_COMMANDS100 * ri->link->refcount) return;
3099
3100 /* If this is a slave of a master in O_DOWN condition we start sending
3101 * it INFO every second, instead of the usual SENTINEL_INFO_PERIOD
3102 * period. In this state we want to closely monitor slaves in case they
3103 * are turned into masters by another Sentinel, or by the sysadmin.
3104 *
3105 * Similarly we monitor the INFO output more often if the slave reports
3106 * to be disconnected from the master, so that we can have a fresh
3107 * disconnection time figure. */
3108 if ((ri->flags & SRI_SLAVE(1<<1)) &&
3109 ((ri->master->flags & (SRI_O_DOWN(1<<4)|SRI_FAILOVER_IN_PROGRESS(1<<6))) ||
3110 (ri->master_link_down_time != 0)))
3111 {
3112 info_period = 1000;
3113 } else {
3114 info_period = SENTINEL_INFO_PERIOD10000;
3115 }
3116
3117 /* We ping instances every time the last received pong is older than
3118 * the configured 'down-after-milliseconds' time, but every second
3119 * anyway if 'down-after-milliseconds' is greater than 1 second. */
3120 ping_period = ri->down_after_period;
3121 if (ping_period > SENTINEL_PING_PERIOD1000) ping_period = SENTINEL_PING_PERIOD1000;
3122
3123 /* Send INFO to masters and slaves, not sentinels. */
3124 if ((ri->flags & SRI_SENTINEL(1<<2)) == 0 &&
3125 (ri->info_refresh == 0 ||
3126 (now - ri->info_refresh) > info_period))
3127 {
3128 retval = redisAsyncCommand(ri->link->cc,
3129 sentinelInfoReplyCallback, ri, "%s",
3130 sentinelInstanceMapCommand(ri,"INFO"));
3131 if (retval == C_OK0) ri->link->pending_commands++;
3132 }
3133
3134 /* Send PING to all the three kinds of instances. */
3135 if ((now - ri->link->last_pong_time) > ping_period &&
3136 (now - ri->link->last_ping_time) > ping_period/2) {
3137 sentinelSendPing(ri);
3138 }
3139
3140 /* PUBLISH hello messages to all the three kinds of instances. */
3141 if ((now - ri->last_pub_time) > SENTINEL_PUBLISH_PERIOD2000) {
3142 sentinelSendHello(ri);
3143 }
3144}
3145
3146/* =========================== SENTINEL command ============================= */
3147
3148/* SENTINEL CONFIG SET <option> */
3149void sentinelConfigSetCommand(client *c) {
3150 robj *o = c->argv[3];
3151 robj *val = c->argv[4];
3152 long long numval;
3153 int drop_conns = 0;
3154
3155 if (!strcasecmp(o->ptr, "resolve-hostnames")) {
3156 if ((numval = yesnotoi(val->ptr)) == -1) goto badfmt;
3157 sentinel.resolve_hostnames = numval;
3158 } else if (!strcasecmp(o->ptr, "announce-hostnames")) {
3159 if ((numval = yesnotoi(val->ptr)) == -1) goto badfmt;
3160 sentinel.announce_hostnames = numval;
3161 } else if (!strcasecmp(o->ptr, "announce-ip")) {
3162 if (sentinel.announce_ip) sdsfree(sentinel.announce_ip);
3163 sentinel.announce_ip = sdsnew(val->ptr);
3164 } else if (!strcasecmp(o->ptr, "announce-port")) {
3165 if (getLongLongFromObject(val, &numval) == C_ERR-1 ||
3166 numval < 0 || numval > 65535)
3167 goto badfmt;
3168 sentinel.announce_port = numval;
3169 } else if (!strcasecmp(o->ptr, "sentinel-user")) {
3170 sdsfree(sentinel.sentinel_auth_user);
3171 sentinel.sentinel_auth_user = sdsnew(val->ptr);
3172 drop_conns = 1;
3173 } else if (!strcasecmp(o->ptr, "sentinel-pass")) {
3174 sdsfree(sentinel.sentinel_auth_pass);
3175 sentinel.sentinel_auth_pass = sdsnew(val->ptr);
3176 drop_conns = 1;
3177 } else {
3178 addReplyErrorFormat(c, "Invalid argument '%s' to SENTINEL CONFIG SET",
3179 (char *) o->ptr);
3180 return;
3181 }
3182
3183 sentinelFlushConfig();
3184 addReply(c, shared.ok);
3185
3186 /* Drop Sentinel connections to initiate a reconnect if needed. */
3187 if (drop_conns)
3188 sentinelDropConnections();
3189
3190 return;
3191
3192badfmt:
3193 addReplyErrorFormat(c, "Invalid value '%s' to SENTINEL CONFIG SET '%s'",
3194 (char *) val->ptr, (char *) o->ptr);
3195}
3196
3197/* SENTINEL CONFIG GET <option> */
3198void sentinelConfigGetCommand(client *c) {
3199 robj *o = c->argv[3];
3200 const char *pattern = o->ptr;
3201 void *replylen = addReplyDeferredLen(c);
3202 int matches = 0;
3203
3204 if (stringmatch(pattern,"resolve-hostnames",1)) {
3205 addReplyBulkCString(c,"resolve-hostnames");
3206 addReplyBulkCString(c,sentinel.resolve_hostnames ? "yes" : "no");
3207 matches++;
3208 }
3209
3210 if (stringmatch(pattern, "announce-hostnames", 1)) {
3211 addReplyBulkCString(c,"announce-hostnames");
3212 addReplyBulkCString(c,sentinel.announce_hostnames ? "yes" : "no");
3213 matches++;
3214 }
3215
3216 if (stringmatch(pattern, "announce-ip", 1)) {
3217 addReplyBulkCString(c,"announce-ip");
3218 addReplyBulkCString(c,sentinel.announce_ip ? sentinel.announce_ip : "");
3219 matches++;
3220 }
3221
3222 if (stringmatch(pattern, "announce-port", 1)) {
3223 addReplyBulkCString(c, "announce-port");
3224 addReplyBulkLongLong(c, sentinel.announce_port);
3225 matches++;
3226 }
3227
3228 if (stringmatch(pattern, "sentinel-user", 1)) {
3229 addReplyBulkCString(c, "sentinel-user");
3230 addReplyBulkCString(c, sentinel.sentinel_auth_user ? sentinel.sentinel_auth_user : "");
3231 matches++;
3232 }
3233
3234 if (stringmatch(pattern, "sentinel-pass", 1)) {
3235 addReplyBulkCString(c, "sentinel-pass");
3236 addReplyBulkCString(c, sentinel.sentinel_auth_pass ? sentinel.sentinel_auth_pass : "");
3237 matches++;
3238 }
3239
3240 setDeferredMapLen(c, replylen, matches);
3241}
3242
3243const char *sentinelFailoverStateStr(int state) {
3244 switch(state) {
3245 case SENTINEL_FAILOVER_STATE_NONE0: return "none";
3246 case SENTINEL_FAILOVER_STATE_WAIT_START1: return "wait_start";
3247 case SENTINEL_FAILOVER_STATE_SELECT_SLAVE2: return "select_slave";
3248 case SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE3: return "send_slaveof_noone";
3249 case SENTINEL_FAILOVER_STATE_WAIT_PROMOTION4: return "wait_promotion";
3250 case SENTINEL_FAILOVER_STATE_RECONF_SLAVES5: return "reconf_slaves";
3251 case SENTINEL_FAILOVER_STATE_UPDATE_CONFIG6: return "update_config";
3252 default: return "unknown";
3253 }
3254}
3255
3256/* Redis instance to Redis protocol representation. */
3257void addReplySentinelRedisInstance(client *c, sentinelRedisInstance *ri) {
3258 char *flags = sdsempty();
3259 void *mbl;
3260 int fields = 0;
3261
3262 mbl = addReplyDeferredLen(c);
3263
3264 addReplyBulkCString(c,"name");
3265 addReplyBulkCString(c,ri->name);
3266 fields++;
3267
3268 addReplyBulkCString(c,"ip");
3269 addReplyBulkCString(c,announceSentinelAddr(ri->addr));
3270 fields++;
3271
3272 addReplyBulkCString(c,"port");
3273 addReplyBulkLongLong(c,ri->addr->port);
3274 fields++;
3275
3276 addReplyBulkCString(c,"runid");
3277 addReplyBulkCString(c,ri->runid ? ri->runid : "");
3278 fields++;
3279
3280 addReplyBulkCString(c,"flags");
3281 if (ri->flags & SRI_S_DOWN(1<<3)) flags = sdscat(flags,"s_down,");
3282 if (ri->flags & SRI_O_DOWN(1<<4)) flags = sdscat(flags,"o_down,");
3283 if (ri->flags & SRI_MASTER(1<<0)) flags = sdscat(flags,"master,");
3284 if (ri->flags & SRI_SLAVE(1<<1)) flags = sdscat(flags,"slave,");
3285 if (ri->flags & SRI_SENTINEL(1<<2)) flags = sdscat(flags,"sentinel,");
3286 if (ri->link->disconnected) flags = sdscat(flags,"disconnected,");
3287 if (ri->flags & SRI_MASTER_DOWN(1<<5)) flags = sdscat(flags,"master_down,");
3288 if (ri->flags & SRI_FAILOVER_IN_PROGRESS(1<<6))
3289 flags = sdscat(flags,"failover_in_progress,");
3290 if (ri->flags & SRI_PROMOTED(1<<7)) flags = sdscat(flags,"promoted,");
3291 if (ri->flags & SRI_RECONF_SENT(1<<8)) flags = sdscat(flags,"reconf_sent,");
3292 if (ri->flags & SRI_RECONF_INPROG(1<<9)) flags = sdscat(flags,"reconf_inprog,");
3293 if (ri->flags & SRI_RECONF_DONE(1<<10)) flags = sdscat(flags,"reconf_done,");
3294
3295 if (sdslen(flags) != 0) sdsrange(flags,0,-2); /* remove last "," */
3296 addReplyBulkCString(c,flags);
3297 sdsfree(flags);
3298 fields++;
3299
3300 addReplyBulkCString(c,"link-pending-commands");
3301 addReplyBulkLongLong(c,ri->link->pending_commands);
3302 fields++;
3303
3304 addReplyBulkCString(c,"link-refcount");
3305 addReplyBulkLongLong(c,ri->link->refcount);
3306 fields++;
3307
3308 if (ri->flags & SRI_FAILOVER_IN_PROGRESS(1<<6)) {
3309 addReplyBulkCString(c,"failover-state");
3310 addReplyBulkCString(c,(char*)sentinelFailoverStateStr(ri->failover_state));
3311 fields++;
3312 }
3313
3314 addReplyBulkCString(c,"last-ping-sent");
3315 addReplyBulkLongLong(c,
3316 ri->link->act_ping_time ? (mstime() - ri->link->act_ping_time) : 0);
3317 fields++;
3318
3319 addReplyBulkCString(c,"last-ok-ping-reply");
3320 addReplyBulkLongLong(c,mstime() - ri->link->last_avail_time);
3321 fields++;
3322
3323 addReplyBulkCString(c,"last-ping-reply");
3324 addReplyBulkLongLong(c,mstime() - ri->link->last_pong_time);
3325 fields++;
3326
3327 if (ri->flags & SRI_S_DOWN(1<<3)) {
3328 addReplyBulkCString(c,"s-down-time");
3329 addReplyBulkLongLong(c,mstime()-ri->s_down_since_time);
3330 fields++;
3331 }
3332
3333 if (ri->flags & SRI_O_DOWN(1<<4)) {
3334 addReplyBulkCString(c,"o-down-time");
3335 addReplyBulkLongLong(c,mstime()-ri->o_down_since_time);
3336 fields++;
3337 }
3338
3339 addReplyBulkCString(c,"down-after-milliseconds");
3340 addReplyBulkLongLong(c,ri->down_after_period);
3341 fields++;
3342
3343 /* Masters and Slaves */
3344 if (ri->flags & (SRI_MASTER(1<<0)|SRI_SLAVE(1<<1))) {
3345 addReplyBulkCString(c,"info-refresh");
3346 addReplyBulkLongLong(c,mstime() - ri->info_refresh);
3347 fields++;
3348
3349 addReplyBulkCString(c,"role-reported");
3350 addReplyBulkCString(c, (ri->role_reported == SRI_MASTER(1<<0)) ? "master" :
3351 "slave");
3352 fields++;
3353
3354 addReplyBulkCString(c,"role-reported-time");
3355 addReplyBulkLongLong(c,mstime() - ri->role_reported_time);
3356 fields++;
3357 }
3358
3359 /* Only masters */
3360 if (ri->flags & SRI_MASTER(1<<0)) {
3361 addReplyBulkCString(c,"config-epoch");
3362 addReplyBulkLongLong(c,ri->config_epoch);
3363 fields++;
3364
3365 addReplyBulkCString(c,"num-slaves");
3366 addReplyBulkLongLong(c,dictSize(ri->slaves)((ri->slaves)->ht[0].used+(ri->slaves)->ht[1].used
)
);
3367 fields++;
3368
3369 addReplyBulkCString(c,"num-other-sentinels");
3370 addReplyBulkLongLong(c,dictSize(ri->sentinels)((ri->sentinels)->ht[0].used+(ri->sentinels)->ht[
1].used)
);
3371 fields++;
3372
3373 addReplyBulkCString(c,"quorum");
3374 addReplyBulkLongLong(c,ri->quorum);
3375 fields++;
3376
3377 addReplyBulkCString(c,"failover-timeout");
3378 addReplyBulkLongLong(c,ri->failover_timeout);
3379 fields++;
3380
3381 addReplyBulkCString(c,"parallel-syncs");
3382 addReplyBulkLongLong(c,ri->parallel_syncs);
3383 fields++;
3384
3385 if (ri->notification_script) {
3386 addReplyBulkCString(c,"notification-script");
3387 addReplyBulkCString(c,ri->notification_script);
3388 fields++;
3389 }
3390
3391 if (ri->client_reconfig_script) {
3392 addReplyBulkCString(c,"client-reconfig-script");
3393 addReplyBulkCString(c,ri->client_reconfig_script);
3394 fields++;
3395 }
3396 }
3397
3398 /* Only slaves */
3399 if (ri->flags & SRI_SLAVE(1<<1)) {
3400 addReplyBulkCString(c,"master-link-down-time");
3401 addReplyBulkLongLong(c,ri->master_link_down_time);
3402 fields++;
3403
3404 addReplyBulkCString(c,"master-link-status");
3405 addReplyBulkCString(c,
3406 (ri->slave_master_link_status == SENTINEL_MASTER_LINK_STATUS_UP0) ?
3407 "ok" : "err");
3408 fields++;
3409
3410 addReplyBulkCString(c,"master-host");
3411 addReplyBulkCString(c,
3412 ri->slave_master_host ? ri->slave_master_host : "?");
3413 fields++;
3414
3415 addReplyBulkCString(c,"master-port");
3416 addReplyBulkLongLong(c,ri->slave_master_port);
3417 fields++;
3418
3419 addReplyBulkCString(c,"slave-priority");
3420 addReplyBulkLongLong(c,ri->slave_priority);
3421 fields++;
3422
3423 addReplyBulkCString(c,"slave-repl-offset");
3424 addReplyBulkLongLong(c,ri->slave_repl_offset);
3425 fields++;
3426 }
3427
3428 /* Only sentinels */
3429 if (ri->flags & SRI_SENTINEL(1<<2)) {
3430 addReplyBulkCString(c,"last-hello-message");
3431 addReplyBulkLongLong(c,mstime() - ri->last_hello_time);
3432 fields++;
3433
3434 addReplyBulkCString(c,"voted-leader");
3435 addReplyBulkCString(c,ri->leader ? ri->leader : "?");
3436 fields++;
3437
3438 addReplyBulkCString(c,"voted-leader-epoch");
3439 addReplyBulkLongLong(c,ri->leader_epoch);
3440 fields++;
3441 }
3442
3443 setDeferredMapLen(c,mbl,fields);
3444}
3445
3446/* Output a number of instances contained inside a dictionary as
3447 * Redis protocol. */
3448void addReplyDictOfRedisInstances(client *c, dict *instances) {
3449 dictIterator *di;
3450 dictEntry *de;
3451
3452 di = dictGetIterator(instances);
3453 addReplyArrayLen(c,dictSize(instances)((instances)->ht[0].used+(instances)->ht[1].used));
3454 while((de = dictNext(di)) != NULL((void*)0)) {
3455 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
3456
3457 addReplySentinelRedisInstance(c,ri);
3458 }
3459 dictReleaseIterator(di);
3460}
3461
3462/* Lookup the named master into sentinel.masters.
3463 * If the master is not found reply to the client with an error and returns
3464 * NULL. */
3465sentinelRedisInstance *sentinelGetMasterByNameOrReplyError(client *c,
3466 robj *name)
3467{
3468 sentinelRedisInstance *ri;
3469
3470 ri = dictFetchValue(sentinel.masters,name->ptr);
3471 if (!ri) {
3472 addReplyError(c,"No such master with that name");
3473 return NULL((void*)0);
3474 }
3475 return ri;
3476}
3477
3478#define SENTINEL_ISQR_OK0 0
3479#define SENTINEL_ISQR_NOQUORUM(1<<0) (1<<0)
3480#define SENTINEL_ISQR_NOAUTH(1<<1) (1<<1)
3481int sentinelIsQuorumReachable(sentinelRedisInstance *master, int *usableptr) {
3482 dictIterator *di;
3483 dictEntry *de;
3484 int usable = 1; /* Number of usable Sentinels. Init to 1 to count myself. */
3485 int result = SENTINEL_ISQR_OK0;
3486 int voters = dictSize(master->sentinels)((master->sentinels)->ht[0].used+(master->sentinels)
->ht[1].used)
+1; /* Known Sentinels + myself. */
3487
3488 di = dictGetIterator(master->sentinels);
3489 while((de = dictNext(di)) != NULL((void*)0)) {
3490 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
3491
3492 if (ri->flags & (SRI_S_DOWN(1<<3)|SRI_O_DOWN(1<<4))) continue;
3493 usable++;
3494 }
3495 dictReleaseIterator(di);
3496
3497 if (usable < (int)master->quorum) result |= SENTINEL_ISQR_NOQUORUM(1<<0);
3498 if (usable < voters/2+1) result |= SENTINEL_ISQR_NOAUTH(1<<1);
3499 if (usableptr) *usableptr = usable;
3500 return result;
3501}
3502
3503void sentinelCommand(client *c) {
3504 if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
3505 const char *help[] = {
3506"CKQUORUM <master-name>",
3507" Check if the current Sentinel configuration is able to reach the quorum",
3508" needed to failover a master and the majority needed to authorize the",
3509" failover.",
3510"CONFIG SET <param> <value>",
3511" Set a global Sentinel configuration parameter.",
3512"CONFIG GET <param>",
3513" Get global Sentinel configuration parameter.",
3514"GET-MASTER-ADDR-BY-NAME <master-name>",
3515" Return the ip and port number of the master with that name.",
3516"FAILOVER <master-name>",
3517" Manually failover a master node without asking for agreement from other",
3518" Sentinels",
3519"FLUSHCONFIG",
3520" Force Sentinel to rewrite its configuration on disk, including the current",
3521" Sentinel state.",
3522"INFO-CACHE <master-name>",
3523" Return last cached INFO output from masters and all its replicas.",
3524"IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid>",
3525" Check if the master specified by ip:port is down from current Sentinel's",
3526" point of view.",
3527"MASTER <master-name>",
3528" Show the state and info of the specified master.",
3529"MASTERS",
3530" Show a list of monitored masters and their state.",
3531"MONITOR <name> <ip> <port> <quorum>",
3532" Start monitoring a new master with the specified name, ip, port and quorum.",
3533"MYID",
3534" Return the ID of the Sentinel instance.",
3535"PENDING-SCRIPTS",
3536" Get pending scripts information.",
3537"REMOVE <master-name>",
3538" Remove master from Sentinel's monitor list.",
3539"REPLICAS <master-name>",
3540" Show a list of replicas for this master and their state.",
3541"RESET <pattern>",
3542" Reset masters for specific master name matching this pattern.",
3543"SENTINELS <master-name>",
3544" Show a list of Sentinel instances for this master and their state.",
3545"SET <master-name> <option> <value>",
3546" Set configuration paramters for certain masters.",
3547"SIMULATE-FAILURE (CRASH-AFTER-ELECTION|CRASH-AFTER-PROMOTION|HELP)",
3548" Simulate a Sentinel crash.",
3549NULL((void*)0)
3550 };
3551 addReplyHelp(c, help);
3552 } else if (!strcasecmp(c->argv[1]->ptr,"masters")) {
3553 /* SENTINEL MASTERS */
3554 if (c->argc != 2) goto numargserr;
3555 addReplyDictOfRedisInstances(c,sentinel.masters);
3556 } else if (!strcasecmp(c->argv[1]->ptr,"master")) {
3557 /* SENTINEL MASTER <name> */
3558 sentinelRedisInstance *ri;
3559
3560 if (c->argc != 3) goto numargserr;
3561 if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
3562 == NULL((void*)0)) return;
3563 addReplySentinelRedisInstance(c,ri);
3564 } else if (!strcasecmp(c->argv[1]->ptr,"slaves") ||
3565 !strcasecmp(c->argv[1]->ptr,"replicas"))
3566 {
3567 /* SENTINEL REPLICAS <master-name> */
3568 sentinelRedisInstance *ri;
3569
3570 if (c->argc != 3) goto numargserr;
3571 if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2])) == NULL((void*)0))
3572 return;
3573 addReplyDictOfRedisInstances(c,ri->slaves);
3574 } else if (!strcasecmp(c->argv[1]->ptr,"sentinels")) {
3575 /* SENTINEL SENTINELS <master-name> */
3576 sentinelRedisInstance *ri;
3577
3578 if (c->argc != 3) goto numargserr;
3579 if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2])) == NULL((void*)0))
3580 return;
3581 addReplyDictOfRedisInstances(c,ri->sentinels);
3582 } else if (!strcasecmp(c->argv[1]->ptr,"myid") && c->argc == 2) {
3583 /* SENTINEL MYID */
3584 addReplyBulkCBuffer(c,sentinel.myid,CONFIG_RUN_ID_SIZE40);
3585 } else if (!strcasecmp(c->argv[1]->ptr,"is-master-down-by-addr")) {
3586 /* SENTINEL IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid>
3587 *
3588 * Arguments:
3589 *
3590 * ip and port are the ip and port of the master we want to be
3591 * checked by Sentinel. Note that the command will not check by
3592 * name but just by master, in theory different Sentinels may monitor
3593 * different masters with the same name.
3594 *
3595 * current-epoch is needed in order to understand if we are allowed
3596 * to vote for a failover leader or not. Each Sentinel can vote just
3597 * one time per epoch.
3598 *
3599 * runid is "*" if we are not seeking for a vote from the Sentinel
3600 * in order to elect the failover leader. Otherwise it is set to the
3601 * runid we want the Sentinel to vote if it did not already voted.
3602 */
3603 sentinelRedisInstance *ri;
3604 long long req_epoch;
3605 uint64_t leader_epoch = 0;
3606 char *leader = NULL((void*)0);
3607 long port;
3608 int isdown = 0;
3609
3610 if (c->argc != 6) goto numargserr;
3611 if (getLongFromObjectOrReply(c,c->argv[3],&port,NULL((void*)0)) != C_OK0 ||
3612 getLongLongFromObjectOrReply(c,c->argv[4],&req_epoch,NULL((void*)0))
3613 != C_OK0)
3614 return;
3615 ri = getSentinelRedisInstanceByAddrAndRunID(sentinel.masters,
3616 c->argv[2]->ptr,port,NULL((void*)0));
3617
3618 /* It exists? Is actually a master? Is subjectively down? It's down.
3619 * Note: if we are in tilt mode we always reply with "0". */
3620 if (!sentinel.tilt && ri && (ri->flags & SRI_S_DOWN(1<<3)) &&
3621 (ri->flags & SRI_MASTER(1<<0)))
3622 isdown = 1;
3623
3624 /* Vote for the master (or fetch the previous vote) if the request
3625 * includes a runid, otherwise the sender is not seeking for a vote. */
3626 if (ri && ri->flags & SRI_MASTER(1<<0) && strcasecmp(c->argv[5]->ptr,"*")) {
3627 leader = sentinelVoteLeader(ri,(uint64_t)req_epoch,
3628 c->argv[5]->ptr,
3629 &leader_epoch);
3630 }
3631
3632 /* Reply with a three-elements multi-bulk reply:
3633 * down state, leader, vote epoch. */
3634 addReplyArrayLen(c,3);
3635 addReply(c, isdown ? shared.cone : shared.czero);
3636 addReplyBulkCString(c, leader ? leader : "*");
3637 addReplyLongLong(c, (long long)leader_epoch);
3638 if (leader) sdsfree(leader);
3639 } else if (!strcasecmp(c->argv[1]->ptr,"reset")) {
3640 /* SENTINEL RESET <pattern> */
3641 if (c->argc != 3) goto numargserr;
3642 addReplyLongLong(c,sentinelResetMastersByPattern(c->argv[2]->ptr,SENTINEL_GENERATE_EVENT(1<<16)));
3643 } else if (!strcasecmp(c->argv[1]->ptr,"get-master-addr-by-name")) {
3644 /* SENTINEL GET-MASTER-ADDR-BY-NAME <master-name> */
3645 sentinelRedisInstance *ri;
3646
3647 if (c->argc != 3) goto numargserr;
3648 ri = sentinelGetMasterByName(c->argv[2]->ptr);
3649 if (ri == NULL((void*)0)) {
3650 addReplyNullArray(c);
3651 } else {
3652 sentinelAddr *addr = sentinelGetCurrentMasterAddress(ri);
3653
3654 addReplyArrayLen(c,2);
3655 addReplyBulkCString(c,announceSentinelAddr(addr));
3656 addReplyBulkLongLong(c,addr->port);
3657 }
3658 } else if (!strcasecmp(c->argv[1]->ptr,"failover")) {
3659 /* SENTINEL FAILOVER <master-name> */
3660 sentinelRedisInstance *ri;
3661
3662 if (c->argc != 3) goto numargserr;
3663 if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2])) == NULL((void*)0))
3664 return;
3665 if (ri->flags & SRI_FAILOVER_IN_PROGRESS(1<<6)) {
3666 addReplySds(c,sdsnew("-INPROG Failover already in progress\r\n"));
3667 return;
3668 }
3669 if (sentinelSelectSlave(ri) == NULL((void*)0)) {
3670 addReplySds(c,sdsnew("-NOGOODSLAVE No suitable replica to promote\r\n"));
3671 return;
3672 }
3673 serverLog(LL_WARNING3,"Executing user requested FAILOVER of '%s'",
3674 ri->name);
3675 sentinelStartFailover(ri);
3676 ri->flags |= SRI_FORCE_FAILOVER(1<<11);
3677 addReply(c,shared.ok);
3678 } else if (!strcasecmp(c->argv[1]->ptr,"pending-scripts")) {
3679 /* SENTINEL PENDING-SCRIPTS */
3680
3681 if (c->argc != 2) goto numargserr;
3682 sentinelPendingScriptsCommand(c);
3683 } else if (!strcasecmp(c->argv[1]->ptr,"monitor")) {
3684 /* SENTINEL MONITOR <name> <ip> <port> <quorum> */
3685 sentinelRedisInstance *ri;
3686 long quorum, port;
3687 char ip[NET_IP_STR_LEN46];
3688
3689 if (c->argc != 6) goto numargserr;
3690 if (getLongFromObjectOrReply(c,c->argv[5],&quorum,"Invalid quorum")
3691 != C_OK0) return;
3692 if (getLongFromObjectOrReply(c,c->argv[4],&port,"Invalid port")
3693 != C_OK0) return;
3694
3695 if (quorum <= 0) {
3696 addReplyError(c, "Quorum must be 1 or greater.");
3697 return;
3698 }
3699
3700 /* If resolve-hostnames is used, actual DNS resolution may take place.
3701 * Otherwise just validate address.
3702 */
3703 if (anetResolve(NULL((void*)0),c->argv[3]->ptr,ip,sizeof(ip),
3704 sentinel.resolve_hostnames ? ANET_NONE0 : ANET_IP_ONLY(1<<0)) == ANET_ERR-1) {
3705 addReplyError(c, "Invalid IP address or hostname specified");
3706 return;
3707 }
3708
3709 /* Parameters are valid. Try to create the master instance. */
3710 ri = createSentinelRedisInstance(c->argv[2]->ptr,SRI_MASTER(1<<0),
3711 c->argv[3]->ptr,port,quorum,NULL((void*)0));
3712 if (ri == NULL((void*)0)) {
3713 switch(errno(*__errno_location ())) {
3714 case EBUSY16:
3715 addReplyError(c,"Duplicated master name");
3716 break;
3717 case EINVAL22:
3718 addReplyError(c,"Invalid port number");
3719 break;
3720 default:
3721 addReplyError(c,"Unspecified error adding the instance");
3722 break;
3723 }
3724 } else {
3725 sentinelFlushConfig();
3726 sentinelEvent(LL_WARNING3,"+monitor",ri,"%@ quorum %d",ri->quorum);
3727 addReply(c,shared.ok);
3728 }
3729 } else if (!strcasecmp(c->argv[1]->ptr,"flushconfig")) {
3730 if (c->argc != 2) goto numargserr;
3731 sentinelFlushConfig();
3732 addReply(c,shared.ok);
3733 return;
3734 } else if (!strcasecmp(c->argv[1]->ptr,"remove")) {
3735 /* SENTINEL REMOVE <name> */
3736 sentinelRedisInstance *ri;
3737
3738 if (c->argc != 3) goto numargserr;
3739 if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
3740 == NULL((void*)0)) return;
3741 sentinelEvent(LL_WARNING3,"-monitor",ri,"%@");
3742 dictDelete(sentinel.masters,c->argv[2]->ptr);
3743 sentinelFlushConfig();
3744 addReply(c,shared.ok);
3745 } else if (!strcasecmp(c->argv[1]->ptr,"ckquorum")) {
3746 /* SENTINEL CKQUORUM <name> */
3747 sentinelRedisInstance *ri;
3748 int usable;
3749
3750 if (c->argc != 3) goto numargserr;
3751 if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
3752 == NULL((void*)0)) return;
3753 int result = sentinelIsQuorumReachable(ri,&usable);
3754 if (result == SENTINEL_ISQR_OK0) {
3755 addReplySds(c, sdscatfmt(sdsempty(),
3756 "+OK %i usable Sentinels. Quorum and failover authorization "
3757 "can be reached\r\n",usable));
3758 } else {
3759 sds e = sdscatfmt(sdsempty(),
3760 "-NOQUORUM %i usable Sentinels. ",usable);
3761 if (result & SENTINEL_ISQR_NOQUORUM(1<<0))
3762 e = sdscat(e,"Not enough available Sentinels to reach the"
3763 " specified quorum for this master");
3764 if (result & SENTINEL_ISQR_NOAUTH(1<<1)) {
3765 if (result & SENTINEL_ISQR_NOQUORUM(1<<0)) e = sdscat(e,". ");
3766 e = sdscat(e, "Not enough available Sentinels to reach the"
3767 " majority and authorize a failover");
3768 }
3769 e = sdscat(e,"\r\n");
3770 addReplySds(c,e);
3771 }
3772 } else if (!strcasecmp(c->argv[1]->ptr,"set")) {
3773 if (c->argc < 3) goto numargserr;
3774 sentinelSetCommand(c);
3775 } else if (!strcasecmp(c->argv[1]->ptr,"config")) {
3776 if (c->argc < 3) goto numargserr;
3777 if (!strcasecmp(c->argv[2]->ptr,"set") && c->argc == 5)
3778 sentinelConfigSetCommand(c);
3779 else if (!strcasecmp(c->argv[2]->ptr,"get") && c->argc == 4)
3780 sentinelConfigGetCommand(c);
3781 else
3782 addReplyError(c, "Only SENTINEL CONFIG GET <option> / SET <option> <value> are supported.");
3783 } else if (!strcasecmp(c->argv[1]->ptr,"info-cache")) {
3784 /* SENTINEL INFO-CACHE <name> */
3785 if (c->argc < 2) goto numargserr;
3786 mstime_t now = mstime();
3787
3788 /* Create an ad-hoc dictionary type so that we can iterate
3789 * a dictionary composed of just the master groups the user
3790 * requested. */
3791 dictType copy_keeper = instancesDictType;
3792 copy_keeper.valDestructor = NULL((void*)0);
3793 dict *masters_local = sentinel.masters;
3794 if (c->argc > 2) {
3795 masters_local = dictCreate(&copy_keeper, NULL((void*)0));
3796
3797 for (int i = 2; i < c->argc; i++) {
3798 sentinelRedisInstance *ri;
3799 ri = sentinelGetMasterByName(c->argv[i]->ptr);
3800 if (!ri) continue; /* ignore non-existing names */
3801 dictAdd(masters_local, ri->name, ri);
3802 }
3803 }
3804
3805 /* Reply format:
3806 * 1.) master name
3807 * 2.) 1.) info from master
3808 * 2.) info from replica
3809 * ...
3810 * 3.) other master name
3811 * ...
3812 */
3813 addReplyArrayLen(c,dictSize(masters_local)((masters_local)->ht[0].used+(masters_local)->ht[1].used
)
* 2);
3814
3815 dictIterator *di;
3816 dictEntry *de;
3817 di = dictGetIterator(masters_local);
3818 while ((de = dictNext(di)) != NULL((void*)0)) {
3819 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
3820 addReplyBulkCBuffer(c,ri->name,strlen(ri->name));
3821 addReplyArrayLen(c,dictSize(ri->slaves)((ri->slaves)->ht[0].used+(ri->slaves)->ht[1].used
)
+ 1); /* +1 for self */
3822 addReplyArrayLen(c,2);
3823 addReplyLongLong(c, now - ri->info_refresh);
3824 if (ri->info)
3825 addReplyBulkCBuffer(c,ri->info,sdslen(ri->info));
3826 else
3827 addReplyNull(c);
3828
3829 dictIterator *sdi;
3830 dictEntry *sde;
3831 sdi = dictGetIterator(ri->slaves);
3832 while ((sde = dictNext(sdi)) != NULL((void*)0)) {
3833 sentinelRedisInstance *sri = dictGetVal(sde)((sde)->v.val);
3834 addReplyArrayLen(c,2);
3835 addReplyLongLong(c, now - sri->info_refresh);
3836 if (sri->info)
3837 addReplyBulkCBuffer(c,sri->info,sdslen(sri->info));
3838 else
3839 addReplyNull(c);
3840 }
3841 dictReleaseIterator(sdi);
3842 }
3843 dictReleaseIterator(di);
3844 if (masters_local != sentinel.masters) dictRelease(masters_local);
3845 } else if (!strcasecmp(c->argv[1]->ptr,"simulate-failure")) {
3846 /* SENTINEL SIMULATE-FAILURE <flag> <flag> ... <flag> */
3847 int j;
3848
3849 sentinel.simfailure_flags = SENTINEL_SIMFAILURE_NONE0;
3850 for (j = 2; j < c->argc; j++) {
3851 if (!strcasecmp(c->argv[j]->ptr,"crash-after-election")) {
3852 sentinel.simfailure_flags |=
3853 SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION(1<<0);
3854 serverLog(LL_WARNING3,"Failure simulation: this Sentinel "
3855 "will crash after being successfully elected as failover "
3856 "leader");
3857 } else if (!strcasecmp(c->argv[j]->ptr,"crash-after-promotion")) {
3858 sentinel.simfailure_flags |=
3859 SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION(1<<1);
3860 serverLog(LL_WARNING3,"Failure simulation: this Sentinel "
3861 "will crash after promoting the selected replica to master");
3862 } else if (!strcasecmp(c->argv[j]->ptr,"help")) {
3863 addReplyArrayLen(c,2);
3864 addReplyBulkCString(c,"crash-after-election");
3865 addReplyBulkCString(c,"crash-after-promotion");
3866 } else {
3867 addReplyError(c,"Unknown failure simulation specified");
3868 return;
3869 }
3870 }
3871 addReply(c,shared.ok);
3872 } else {
3873 addReplySubcommandSyntaxError(c);
3874 }
3875 return;
3876
3877numargserr:
3878 addReplyErrorFormat(c,"Wrong number of arguments for 'sentinel %s'",
3879 (char*)c->argv[1]->ptr);
3880}
3881
3882#define info_section_from_redis(section_name)do { if (defsections || allsections || !strcasecmp(section,section_name
)) { sds redissection; if (sections++) info = sdscat(info,"\r\n"
); redissection = genRedisInfoString(section_name); info = sdscatlen
(info,redissection,sdslen(redissection)); sdsfree(redissection
); } } while(0)
do { \
3883 if (defsections || allsections || !strcasecmp(section,section_name)) { \
3884 sds redissection; \
3885 if (sections++) info = sdscat(info,"\r\n"); \
3886 redissection = genRedisInfoString(section_name); \
3887 info = sdscatlen(info,redissection,sdslen(redissection)); \
3888 sdsfree(redissection); \
3889 } \
3890} while(0)
3891
3892/* SENTINEL INFO [section] */
3893void sentinelInfoCommand(client *c) {
3894 if (c->argc > 2) {
3895 addReplyErrorObject(c,shared.syntaxerr);
3896 return;
3897 }
3898
3899 int defsections = 0, allsections = 0;
3900 char *section = c->argc == 2 ? c->argv[1]->ptr : NULL((void*)0);
3901 if (section) {
3902 allsections = !strcasecmp(section,"all");
3903 defsections = !strcasecmp(section,"default");
3904 } else {
3905 defsections = 1;
3906 }
3907
3908 int sections = 0;
3909 sds info = sdsempty();
3910
3911 info_section_from_redis("server")do { if (defsections || allsections || !strcasecmp(section,"server"
)) { sds redissection; if (sections++) info = sdscat(info,"\r\n"
); redissection = genRedisInfoString("server"); info = sdscatlen
(info,redissection,sdslen(redissection)); sdsfree(redissection
); } } while(0)
;
3912 info_section_from_redis("clients")do { if (defsections || allsections || !strcasecmp(section,"clients"
)) { sds redissection; if (sections++) info = sdscat(info,"\r\n"
); redissection = genRedisInfoString("clients"); info = sdscatlen
(info,redissection,sdslen(redissection)); sdsfree(redissection
); } } while(0)
;
3913 info_section_from_redis("cpu")do { if (defsections || allsections || !strcasecmp(section,"cpu"
)) { sds redissection; if (sections++) info = sdscat(info,"\r\n"
); redissection = genRedisInfoString("cpu"); info = sdscatlen
(info,redissection,sdslen(redissection)); sdsfree(redissection
); } } while(0)
;
3914 info_section_from_redis("stats")do { if (defsections || allsections || !strcasecmp(section,"stats"
)) { sds redissection; if (sections++) info = sdscat(info,"\r\n"
); redissection = genRedisInfoString("stats"); info = sdscatlen
(info,redissection,sdslen(redissection)); sdsfree(redissection
); } } while(0)
;
3915
3916 if (defsections || allsections || !strcasecmp(section,"sentinel")) {
3917 dictIterator *di;
3918 dictEntry *de;
3919 int master_id = 0;
3920
3921 if (sections++) info = sdscat(info,"\r\n");
3922 info = sdscatprintf(info,
3923 "# Sentinel\r\n"
3924 "sentinel_masters:%lu\r\n"
3925 "sentinel_tilt:%d\r\n"
3926 "sentinel_running_scripts:%d\r\n"
3927 "sentinel_scripts_queue_length:%ld\r\n"
3928 "sentinel_simulate_failure_flags:%lu\r\n",
3929 dictSize(sentinel.masters)((sentinel.masters)->ht[0].used+(sentinel.masters)->ht[
1].used)
,
3930 sentinel.tilt,
3931 sentinel.running_scripts,
3932 listLength(sentinel.scripts_queue)((sentinel.scripts_queue)->len),
3933 sentinel.simfailure_flags);
3934
3935 di = dictGetIterator(sentinel.masters);
3936 while((de = dictNext(di)) != NULL((void*)0)) {
3937 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
3938 char *status = "ok";
3939
3940 if (ri->flags & SRI_O_DOWN(1<<4)) status = "odown";
3941 else if (ri->flags & SRI_S_DOWN(1<<3)) status = "sdown";
3942 info = sdscatprintf(info,
3943 "master%d:name=%s,status=%s,address=%s:%d,"
3944 "slaves=%lu,sentinels=%lu\r\n",
3945 master_id++, ri->name, status,
3946 announceSentinelAddr(ri->addr), ri->addr->port,
3947 dictSize(ri->slaves)((ri->slaves)->ht[0].used+(ri->slaves)->ht[1].used
)
,
3948 dictSize(ri->sentinels)((ri->sentinels)->ht[0].used+(ri->sentinels)->ht[
1].used)
+1);
3949 }
3950 dictReleaseIterator(di);
3951 }
3952
3953 addReplyBulkSds(c, info);
3954}
3955
3956/* Implements Sentinel version of the ROLE command. The output is
3957 * "sentinel" and the list of currently monitored master names. */
3958void sentinelRoleCommand(client *c) {
3959 dictIterator *di;
3960 dictEntry *de;
3961
3962 addReplyArrayLen(c,2);
3963 addReplyBulkCBuffer(c,"sentinel",8);
3964 addReplyArrayLen(c,dictSize(sentinel.masters)((sentinel.masters)->ht[0].used+(sentinel.masters)->ht[
1].used)
);
3965
3966 di = dictGetIterator(sentinel.masters);
3967 while((de = dictNext(di)) != NULL((void*)0)) {
3968 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
3969
3970 addReplyBulkCString(c,ri->name);
3971 }
3972 dictReleaseIterator(di);
3973}
3974
3975/* SENTINEL SET <mastername> [<option> <value> ...] */
3976void sentinelSetCommand(client *c) {
3977 sentinelRedisInstance *ri;
3978 int j, changes = 0;
3979 int badarg = 0; /* Bad argument position for error reporting. */
3980 char *option;
3981
3982 if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
3983 == NULL((void*)0)) return;
3984
3985 /* Process option - value pairs. */
3986 for (j = 3; j < c->argc; j++) {
3987 int moreargs = (c->argc-1) - j;
3988 option = c->argv[j]->ptr;
3989 long long ll;
3990 int old_j = j; /* Used to know what to log as an event. */
3991
3992 if (!strcasecmp(option,"down-after-milliseconds") && moreargs > 0) {
3993 /* down-after-millisecodns <milliseconds> */
3994 robj *o = c->argv[++j];
3995 if (getLongLongFromObject(o,&ll) == C_ERR-1 || ll <= 0) {
3996 badarg = j;
3997 goto badfmt;
3998 }
3999 ri->down_after_period = ll;
4000 sentinelPropagateDownAfterPeriod(ri);
4001 changes++;
4002 } else if (!strcasecmp(option,"failover-timeout") && moreargs > 0) {
4003 /* failover-timeout <milliseconds> */
4004 robj *o = c->argv[++j];
4005 if (getLongLongFromObject(o,&ll) == C_ERR-1 || ll <= 0) {
4006 badarg = j;
4007 goto badfmt;
4008 }
4009 ri->failover_timeout = ll;
4010 changes++;
4011 } else if (!strcasecmp(option,"parallel-syncs") && moreargs > 0) {
4012 /* parallel-syncs <milliseconds> */
4013 robj *o = c->argv[++j];
4014 if (getLongLongFromObject(o,&ll) == C_ERR-1 || ll <= 0) {
4015 badarg = j;
4016 goto badfmt;
4017 }
4018 ri->parallel_syncs = ll;
4019 changes++;
4020 } else if (!strcasecmp(option,"notification-script") && moreargs > 0) {
4021 /* notification-script <path> */
4022 char *value = c->argv[++j]->ptr;
4023 if (sentinel.deny_scripts_reconfig) {
4024 addReplyError(c,
4025 "Reconfiguration of scripts path is denied for "
4026 "security reasons. Check the deny-scripts-reconfig "
4027 "configuration directive in your Sentinel configuration");
4028 goto seterr;
4029 }
4030
4031 if (strlen(value) && access(value,X_OK1) == -1) {
4032 addReplyError(c,
4033 "Notification script seems non existing or non executable");
4034 goto seterr;
4035 }
4036 sdsfree(ri->notification_script);
4037 ri->notification_script = strlen(value) ? sdsnew(value) : NULL((void*)0);
4038 changes++;
4039 } else if (!strcasecmp(option,"client-reconfig-script") && moreargs > 0) {
4040 /* client-reconfig-script <path> */
4041 char *value = c->argv[++j]->ptr;
4042 if (sentinel.deny_scripts_reconfig) {
4043 addReplyError(c,
4044 "Reconfiguration of scripts path is denied for "
4045 "security reasons. Check the deny-scripts-reconfig "
4046 "configuration directive in your Sentinel configuration");
4047 goto seterr;
4048 }
4049
4050 if (strlen(value) && access(value,X_OK1) == -1) {
4051 addReplyError(c,
4052 "Client reconfiguration script seems non existing or "
4053 "non executable");
4054 goto seterr;
4055 }
4056 sdsfree(ri->client_reconfig_script);
4057 ri->client_reconfig_script = strlen(value) ? sdsnew(value) : NULL((void*)0);
4058 changes++;
4059 } else if (!strcasecmp(option,"auth-pass") && moreargs > 0) {
4060 /* auth-pass <password> */
4061 char *value = c->argv[++j]->ptr;
4062 sdsfree(ri->auth_pass);
4063 ri->auth_pass = strlen(value) ? sdsnew(value) : NULL((void*)0);
4064 changes++;
4065 } else if (!strcasecmp(option,"auth-user") && moreargs > 0) {
4066 /* auth-user <username> */
4067 char *value = c->argv[++j]->ptr;
4068 sdsfree(ri->auth_user);
4069 ri->auth_user = strlen(value) ? sdsnew(value) : NULL((void*)0);
4070 changes++;
4071 } else if (!strcasecmp(option,"quorum") && moreargs > 0) {
4072 /* quorum <count> */
4073 robj *o = c->argv[++j];
4074 if (getLongLongFromObject(o,&ll) == C_ERR-1 || ll <= 0) {
4075 badarg = j;
4076 goto badfmt;
4077 }
4078 ri->quorum = ll;
4079 changes++;
4080 } else if (!strcasecmp(option,"rename-command") && moreargs > 1) {
4081 /* rename-command <oldname> <newname> */
4082 sds oldname = c->argv[++j]->ptr;
4083 sds newname = c->argv[++j]->ptr;
4084
4085 if ((sdslen(oldname) == 0) || (sdslen(newname) == 0)) {
4086 badarg = sdslen(newname) ? j-1 : j;
4087 goto badfmt;
4088 }
4089
4090 /* Remove any older renaming for this command. */
4091 dictDelete(ri->renamed_commands,oldname);
4092
4093 /* If the target name is the same as the source name there
4094 * is no need to add an entry mapping to itself. */
4095 if (!dictSdsKeyCaseCompare(NULL((void*)0),oldname,newname)) {
4096 oldname = sdsdup(oldname);
4097 newname = sdsdup(newname);
4098 dictAdd(ri->renamed_commands,oldname,newname);
4099 }
4100 changes++;
4101 } else {
4102 addReplyErrorFormat(c,"Unknown option or number of arguments for "
4103 "SENTINEL SET '%s'", option);
4104 goto seterr;
4105 }
4106
4107 /* Log the event. */
4108 int numargs = j-old_j+1;
4109 switch(numargs) {
4110 case 2:
4111 sentinelEvent(LL_WARNING3,"+set",ri,"%@ %s %s",c->argv[old_j]->ptr,
4112 c->argv[old_j+1]->ptr);
4113 break;
4114 case 3:
4115 sentinelEvent(LL_WARNING3,"+set",ri,"%@ %s %s %s",c->argv[old_j]->ptr,
4116 c->argv[old_j+1]->ptr,
4117 c->argv[old_j+2]->ptr);
4118 break;
4119 default:
4120 sentinelEvent(LL_WARNING3,"+set",ri,"%@ %s",c->argv[old_j]->ptr);
4121 break;
4122 }
4123 }
4124
4125 if (changes) sentinelFlushConfig();
4126 addReply(c,shared.ok);
4127 return;
4128
4129badfmt: /* Bad format errors */
4130 addReplyErrorFormat(c,"Invalid argument '%s' for SENTINEL SET '%s'",
4131 (char*)c->argv[badarg]->ptr,option);
4132seterr:
4133 if (changes) sentinelFlushConfig();
4134 return;
4135}
4136
4137/* Our fake PUBLISH command: it is actually useful only to receive hello messages
4138 * from the other sentinel instances, and publishing to a channel other than
4139 * SENTINEL_HELLO_CHANNEL is forbidden.
4140 *
4141 * Because we have a Sentinel PUBLISH, the code to send hello messages is the same
4142 * for all the three kind of instances: masters, slaves, sentinels. */
4143void sentinelPublishCommand(client *c) {
4144 if (strcmp(c->argv[1]->ptr,SENTINEL_HELLO_CHANNEL"__sentinel__:hello")) {
4145 addReplyError(c, "Only HELLO messages are accepted by Sentinel instances.");
4146 return;
4147 }
4148 sentinelProcessHelloMessage(c->argv[2]->ptr,sdslen(c->argv[2]->ptr));
4149 addReplyLongLong(c,1);
4150}
4151
4152/* ===================== SENTINEL availability checks ======================= */
4153
4154/* Is this instance down from our point of view? */
4155void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
4156 mstime_t elapsed = 0;
4157
4158 if (ri->link->act_ping_time)
4159 elapsed = mstime() - ri->link->act_ping_time;
4160 else if (ri->link->disconnected)
4161 elapsed = mstime() - ri->link->last_avail_time;
4162
4163 /* Check if we are in need for a reconnection of one of the
4164 * links, because we are detecting low activity.
4165 *
4166 * 1) Check if the command link seems connected, was connected not less
4167 * than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have a
4168 * pending ping for more than half the timeout. */
4169 if (ri->link->cc &&
4170 (mstime() - ri->link->cc_conn_time) >
4171 SENTINEL_MIN_LINK_RECONNECT_PERIOD15000 &&
4172 ri->link->act_ping_time != 0 && /* There is a pending ping... */
4173 /* The pending ping is delayed, and we did not receive
4174 * error replies as well. */
4175 (mstime() - ri->link->act_ping_time) > (ri->down_after_period/2) &&
4176 (mstime() - ri->link->last_pong_time) > (ri->down_after_period/2))
4177 {
4178 instanceLinkCloseConnection(ri->link,ri->link->cc);
4179 }
4180
4181 /* 2) Check if the pubsub link seems connected, was connected not less
4182 * than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have no
4183 * activity in the Pub/Sub channel for more than
4184 * SENTINEL_PUBLISH_PERIOD * 3.
4185 */
4186 if (ri->link->pc &&
4187 (mstime() - ri->link->pc_conn_time) >
4188 SENTINEL_MIN_LINK_RECONNECT_PERIOD15000 &&
4189 (mstime() - ri->link->pc_last_activity) > (SENTINEL_PUBLISH_PERIOD2000*3))
4190 {
4191 instanceLinkCloseConnection(ri->link,ri->link->pc);
4192 }
4193
4194 /* Update the SDOWN flag. We believe the instance is SDOWN if:
4195 *
4196 * 1) It is not replying.
4197 * 2) We believe it is a master, it reports to be a slave for enough time
4198 * to meet the down_after_period, plus enough time to get two times
4199 * INFO report from the instance. */
4200 if (elapsed > ri->down_after_period ||
4201 (ri->flags & SRI_MASTER(1<<0) &&
4202 ri->role_reported == SRI_SLAVE(1<<1) &&
4203 mstime() - ri->role_reported_time >
4204 (ri->down_after_period+SENTINEL_INFO_PERIOD10000*2)))
4205 {
4206 /* Is subjectively down */
4207 if ((ri->flags & SRI_S_DOWN(1<<3)) == 0) {
4208 sentinelEvent(LL_WARNING3,"+sdown",ri,"%@");
4209 ri->s_down_since_time = mstime();
4210 ri->flags |= SRI_S_DOWN(1<<3);
4211 }
4212 } else {
4213 /* Is subjectively up */
4214 if (ri->flags & SRI_S_DOWN(1<<3)) {
4215 sentinelEvent(LL_WARNING3,"-sdown",ri,"%@");
4216 ri->flags &= ~(SRI_S_DOWN(1<<3)|SRI_SCRIPT_KILL_SENT(1<<12));
4217 }
4218 }
4219}
4220
4221/* Is this instance down according to the configured quorum?
4222 *
4223 * Note that ODOWN is a weak quorum, it only means that enough Sentinels
4224 * reported in a given time range that the instance was not reachable.
4225 * However messages can be delayed so there are no strong guarantees about
4226 * N instances agreeing at the same time about the down state. */
4227void sentinelCheckObjectivelyDown(sentinelRedisInstance *master) {
4228 dictIterator *di;
4229 dictEntry *de;
4230 unsigned int quorum = 0, odown = 0;
4231
4232 if (master->flags & SRI_S_DOWN(1<<3)) {
4233 /* Is down for enough sentinels? */
4234 quorum = 1; /* the current sentinel. */
4235 /* Count all the other sentinels. */
4236 di = dictGetIterator(master->sentinels);
4237 while((de = dictNext(di)) != NULL((void*)0)) {
4238 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
4239
4240 if (ri->flags & SRI_MASTER_DOWN(1<<5)) quorum++;
4241 }
4242 dictReleaseIterator(di);
4243 if (quorum >= master->quorum) odown = 1;
4244 }
4245
4246 /* Set the flag accordingly to the outcome. */
4247 if (odown) {
4248 if ((master->flags & SRI_O_DOWN(1<<4)) == 0) {
4249 sentinelEvent(LL_WARNING3,"+odown",master,"%@ #quorum %d/%d",
4250 quorum, master->quorum);
4251 master->flags |= SRI_O_DOWN(1<<4);
4252 master->o_down_since_time = mstime();
4253 }
4254 } else {
4255 if (master->flags & SRI_O_DOWN(1<<4)) {
4256 sentinelEvent(LL_WARNING3,"-odown",master,"%@");
4257 master->flags &= ~SRI_O_DOWN(1<<4);
4258 }
4259 }
4260}
4261
4262/* Receive the SENTINEL is-master-down-by-addr reply, see the
4263 * sentinelAskMasterStateToOtherSentinels() function for more information. */
4264void sentinelReceiveIsMasterDownReply(redisAsyncContext *c, void *reply, void *privdata) {
4265 sentinelRedisInstance *ri = privdata;
4266 instanceLink *link = c->data;
4267 redisReply *r;
4268
4269 if (!reply || !link) return;
4270 link->pending_commands--;
4271 r = reply;
4272
4273 /* Ignore every error or unexpected reply.
4274 * Note that if the command returns an error for any reason we'll
4275 * end clearing the SRI_MASTER_DOWN flag for timeout anyway. */
4276 if (r->type == REDIS_REPLY_ARRAY2 && r->elements == 3 &&
4277 r->element[0]->type == REDIS_REPLY_INTEGER3 &&
4278 r->element[1]->type == REDIS_REPLY_STRING1 &&
4279 r->element[2]->type == REDIS_REPLY_INTEGER3)
4280 {
4281 ri->last_master_down_reply_time = mstime();
4282 if (r->element[0]->integer == 1) {
4283 ri->flags |= SRI_MASTER_DOWN(1<<5);
4284 } else {
4285 ri->flags &= ~SRI_MASTER_DOWN(1<<5);
4286 }
4287 if (strcmp(r->element[1]->str,"*")) {
4288 /* If the runid in the reply is not "*" the Sentinel actually
4289 * replied with a vote. */
4290 sdsfree(ri->leader);
4291 if ((long long)ri->leader_epoch != r->element[2]->integer)
4292 serverLog(LL_WARNING3,
4293 "%s voted for %s %llu", ri->name,
4294 r->element[1]->str,
4295 (unsigned long long) r->element[2]->integer);
4296 ri->leader = sdsnew(r->element[1]->str);
4297 ri->leader_epoch = r->element[2]->integer;
4298 }
4299 }
4300}
4301
4302/* If we think the master is down, we start sending
4303 * SENTINEL IS-MASTER-DOWN-BY-ADDR requests to other sentinels
4304 * in order to get the replies that allow to reach the quorum
4305 * needed to mark the master in ODOWN state and trigger a failover. */
4306#define SENTINEL_ASK_FORCED(1<<0) (1<<0)
4307void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int flags) {
4308 dictIterator *di;
4309 dictEntry *de;
4310
4311 di = dictGetIterator(master->sentinels);
4312 while((de = dictNext(di)) != NULL((void*)0)) {
4313 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
4314 mstime_t elapsed = mstime() - ri->last_master_down_reply_time;
4315 char port[32];
4316 int retval;
4317
4318 /* If the master state from other sentinel is too old, we clear it. */
4319 if (elapsed > SENTINEL_ASK_PERIOD1000*5) {
4320 ri->flags &= ~SRI_MASTER_DOWN(1<<5);
4321 sdsfree(ri->leader);
4322 ri->leader = NULL((void*)0);
4323 }
4324
4325 /* Only ask if master is down to other sentinels if:
4326 *
4327 * 1) We believe it is down, or there is a failover in progress.
4328 * 2) Sentinel is connected.
4329 * 3) We did not receive the info within SENTINEL_ASK_PERIOD ms. */
4330 if ((master->flags & SRI_S_DOWN(1<<3)) == 0) continue;
4331 if (ri->link->disconnected) continue;
4332 if (!(flags & SENTINEL_ASK_FORCED(1<<0)) &&
4333 mstime() - ri->last_master_down_reply_time < SENTINEL_ASK_PERIOD1000)
4334 continue;
4335
4336 /* Ask */
4337 ll2string(port,sizeof(port),master->addr->port);
4338 retval = redisAsyncCommand(ri->link->cc,
4339 sentinelReceiveIsMasterDownReply, ri,
4340 "%s is-master-down-by-addr %s %s %llu %s",
4341 sentinelInstanceMapCommand(ri,"SENTINEL"),
4342 announceSentinelAddr(master->addr), port,
4343 sentinel.current_epoch,
4344 (master->failover_state > SENTINEL_FAILOVER_STATE_NONE0) ?
4345 sentinel.myid : "*");
4346 if (retval == C_OK0) ri->link->pending_commands++;
4347 }
4348 dictReleaseIterator(di);
4349}
4350
4351/* =============================== FAILOVER ================================= */
4352
4353/* Crash because of user request via SENTINEL simulate-failure command. */
4354void sentinelSimFailureCrash(void) {
4355 serverLog(LL_WARNING3,
4356 "Sentinel CRASH because of SENTINEL simulate-failure");
4357 exit(99);
4358}
4359
4360/* Vote for the sentinel with 'req_runid' or return the old vote if already
4361 * voted for the specified 'req_epoch' or one greater.
4362 *
4363 * If a vote is not available returns NULL, otherwise return the Sentinel
4364 * runid and populate the leader_epoch with the epoch of the vote. */
4365char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch) {
4366 if (req_epoch > sentinel.current_epoch) {
4367 sentinel.current_epoch = req_epoch;
4368 sentinelFlushConfig();
4369 sentinelEvent(LL_WARNING3,"+new-epoch",master,"%llu",
4370 (unsigned long long) sentinel.current_epoch);
4371 }
4372
4373 if (master->leader_epoch < req_epoch && sentinel.current_epoch <= req_epoch)
4374 {
4375 sdsfree(master->leader);
4376 master->leader = sdsnew(req_runid);
4377 master->leader_epoch = sentinel.current_epoch;
4378 sentinelFlushConfig();
4379 sentinelEvent(LL_WARNING3,"+vote-for-leader",master,"%s %llu",
4380 master->leader, (unsigned long long) master->leader_epoch);
4381 /* If we did not voted for ourselves, set the master failover start
4382 * time to now, in order to force a delay before we can start a
4383 * failover for the same master. */
4384 if (strcasecmp(master->leader,sentinel.myid))
4385 master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC1000;
4386 }
4387
4388 *leader_epoch = master->leader_epoch;
4389 return master->leader ? sdsnew(master->leader) : NULL((void*)0);
4390}
4391
4392struct sentinelLeader {
4393 char *runid;
4394 unsigned long votes;
4395};
4396
4397/* Helper function for sentinelGetLeader, increment the counter
4398 * relative to the specified runid. */
4399int sentinelLeaderIncr(dict *counters, char *runid) {
4400 dictEntry *existing, *de;
4401 uint64_t oldval;
4402
4403 de = dictAddRaw(counters,runid,&existing);
4404 if (existing) {
4405 oldval = dictGetUnsignedIntegerVal(existing)((existing)->v.u64);
4406 dictSetUnsignedIntegerVal(existing,oldval+1)do { (existing)->v.u64 = oldval+1; } while(0);
4407 return oldval+1;
4408 } else {
4409 serverAssert(de != NULL)((de != ((void*)0))?(void)0 : (_serverAssert("de != NULL","sentinel.c"
,4409),__builtin_unreachable()))
;
4410 dictSetUnsignedIntegerVal(de,1)do { (de)->v.u64 = 1; } while(0);
4411 return 1;
4412 }
4413}
4414
4415/* Scan all the Sentinels attached to this master to check if there
4416 * is a leader for the specified epoch.
4417 *
4418 * To be a leader for a given epoch, we should have the majority of
4419 * the Sentinels we know (ever seen since the last SENTINEL RESET) that
4420 * reported the same instance as leader for the same epoch. */
4421char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) {
4422 dict *counters;
4423 dictIterator *di;
4424 dictEntry *de;
4425 unsigned int voters = 0, voters_quorum;
4426 char *myvote;
4427 char *winner = NULL((void*)0);
4428 uint64_t leader_epoch;
4429 uint64_t max_votes = 0;
4430
4431 serverAssert(master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS))((master->flags & ((1<<4)|(1<<6)))?(void)0
: (_serverAssert("master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS)"
,"sentinel.c",4431),__builtin_unreachable()))
;
4432 counters = dictCreate(&leaderVotesDictType,NULL((void*)0));
4433
4434 voters = dictSize(master->sentinels)((master->sentinels)->ht[0].used+(master->sentinels)
->ht[1].used)
+1; /* All the other sentinels and me.*/
4435
4436 /* Count other sentinels votes */
4437 di = dictGetIterator(master->sentinels);
4438 while((de = dictNext(di)) != NULL((void*)0)) {
4439 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
4440 if (ri->leader != NULL((void*)0) && ri->leader_epoch == sentinel.current_epoch)
4441 sentinelLeaderIncr(counters,ri->leader);
4442 }
4443 dictReleaseIterator(di);
4444
4445 /* Check what's the winner. For the winner to win, it needs two conditions:
4446 * 1) Absolute majority between voters (50% + 1).
4447 * 2) And anyway at least master->quorum votes. */
4448 di = dictGetIterator(counters);
4449 while((de = dictNext(di)) != NULL((void*)0)) {
4450 uint64_t votes = dictGetUnsignedIntegerVal(de)((de)->v.u64);
4451
4452 if (votes > max_votes) {
4453 max_votes = votes;
4454 winner = dictGetKey(de)((de)->key);
4455 }
4456 }
4457 dictReleaseIterator(di);
4458
4459 /* Count this Sentinel vote:
4460 * if this Sentinel did not voted yet, either vote for the most
4461 * common voted sentinel, or for itself if no vote exists at all. */
4462 if (winner)
4463 myvote = sentinelVoteLeader(master,epoch,winner,&leader_epoch);
4464 else
4465 myvote = sentinelVoteLeader(master,epoch,sentinel.myid,&leader_epoch);
4466
4467 if (myvote && leader_epoch == epoch) {
4468 uint64_t votes = sentinelLeaderIncr(counters,myvote);
4469
4470 if (votes > max_votes) {
4471 max_votes = votes;
4472 winner = myvote;
4473 }
4474 }
4475
4476 voters_quorum = voters/2+1;
4477 if (winner && (max_votes < voters_quorum || max_votes < master->quorum))
4478 winner = NULL((void*)0);
4479
4480 winner = winner ? sdsnew(winner) : NULL((void*)0);
4481 sdsfree(myvote);
4482 dictRelease(counters);
4483 return winner;
4484}
4485
4486/* Send SLAVEOF to the specified instance, always followed by a
4487 * CONFIG REWRITE command in order to store the new configuration on disk
4488 * when possible (that is, if the Redis instance is recent enough to support
4489 * config rewriting, and if the server was started with a configuration file).
4490 *
4491 * If Host is NULL the function sends "SLAVEOF NO ONE".
4492 *
4493 * The command returns C_OK if the SLAVEOF command was accepted for
4494 * (later) delivery otherwise C_ERR. The command replies are just
4495 * discarded. */
4496int sentinelSendSlaveOf(sentinelRedisInstance *ri, const sentinelAddr *addr) {
4497 char portstr[32];
4498 const char *host;
4499 int retval;
4500
4501 /* If host is NULL we send SLAVEOF NO ONE that will turn the instance
4502 * into a master. */
4503 if (!addr) {
4504 host = "NO";
4505 memcpy(portstr,"ONE",4);
4506 } else {
4507 host = announceSentinelAddr(addr);
4508 ll2string(portstr,sizeof(portstr),addr->port);
4509 }
4510
4511 /* In order to send SLAVEOF in a safe way, we send a transaction performing
4512 * the following tasks:
4513 * 1) Reconfigure the instance according to the specified host/port params.
4514 * 2) Rewrite the configuration.
4515 * 3) Disconnect all clients (but this one sending the command) in order
4516 * to trigger the ask-master-on-reconnection protocol for connected
4517 * clients.
4518 *
4519 * Note that we don't check the replies returned by commands, since we
4520 * will observe instead the effects in the next INFO output. */
4521 retval = redisAsyncCommand(ri->link->cc,
4522 sentinelDiscardReplyCallback, ri, "%s",
4523 sentinelInstanceMapCommand(ri,"MULTI"));
4524 if (retval == C_ERR-1) return retval;
4525 ri->link->pending_commands++;
4526
4527 retval = redisAsyncCommand(ri->link->cc,
4528 sentinelDiscardReplyCallback, ri, "%s %s %s",
4529 sentinelInstanceMapCommand(ri,"SLAVEOF"),
4530 host, portstr);
4531 if (retval == C_ERR-1) return retval;
4532 ri->link->pending_commands++;
4533
4534 retval = redisAsyncCommand(ri->link->cc,
4535 sentinelDiscardReplyCallback, ri, "%s REWRITE",
4536 sentinelInstanceMapCommand(ri,"CONFIG"));
4537 if (retval == C_ERR-1) return retval;
4538 ri->link->pending_commands++;
4539
4540 /* CLIENT KILL TYPE <type> is only supported starting from Redis 2.8.12,
4541 * however sending it to an instance not understanding this command is not
4542 * an issue because CLIENT is variadic command, so Redis will not
4543 * recognized as a syntax error, and the transaction will not fail (but
4544 * only the unsupported command will fail). */
4545 for (int type = 0; type < 2; type++) {
4546 retval = redisAsyncCommand(ri->link->cc,
4547 sentinelDiscardReplyCallback, ri, "%s KILL TYPE %s",
4548 sentinelInstanceMapCommand(ri,"CLIENT"),
4549 type == 0 ? "normal" : "pubsub");
4550 if (retval == C_ERR-1) return retval;
4551 ri->link->pending_commands++;
4552 }
4553
4554 retval = redisAsyncCommand(ri->link->cc,
4555 sentinelDiscardReplyCallback, ri, "%s",
4556 sentinelInstanceMapCommand(ri,"EXEC"));
4557 if (retval == C_ERR-1) return retval;
4558 ri->link->pending_commands++;
4559
4560 return C_OK0;
4561}
4562
4563/* Setup the master state to start a failover. */
4564void sentinelStartFailover(sentinelRedisInstance *master) {
4565 serverAssert(master->flags & SRI_MASTER)((master->flags & (1<<0))?(void)0 : (_serverAssert
("master->flags & SRI_MASTER","sentinel.c",4565),__builtin_unreachable
()))
;
4566
4567 master->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START1;
4568 master->flags |= SRI_FAILOVER_IN_PROGRESS(1<<6);
4569 master->failover_epoch = ++sentinel.current_epoch;
4570 sentinelEvent(LL_WARNING3,"+new-epoch",master,"%llu",
4571 (unsigned long long) sentinel.current_epoch);
4572 sentinelEvent(LL_WARNING3,"+try-failover",master,"%@");
4573 master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC1000;
4574 master->failover_state_change_time = mstime();
4575}
4576
4577/* This function checks if there are the conditions to start the failover,
4578 * that is:
4579 *
4580 * 1) Master must be in ODOWN condition.
4581 * 2) No failover already in progress.
4582 * 3) No failover already attempted recently.
4583 *
4584 * We still don't know if we'll win the election so it is possible that we
4585 * start the failover but that we'll not be able to act.
4586 *
4587 * Return non-zero if a failover was started. */
4588int sentinelStartFailoverIfNeeded(sentinelRedisInstance *master) {
4589 /* We can't failover if the master is not in O_DOWN state. */
4590 if (!(master->flags & SRI_O_DOWN(1<<4))) return 0;
4591
4592 /* Failover already in progress? */
4593 if (master->flags & SRI_FAILOVER_IN_PROGRESS(1<<6)) return 0;
4594
4595 /* Last failover attempt started too little time ago? */
4596 if (mstime() - master->failover_start_time <
4597 master->failover_timeout*2)
4598 {
4599 if (master->failover_delay_logged != master->failover_start_time) {
4600 time_t clock = (master->failover_start_time +
4601 master->failover_timeout*2) / 1000;
4602 char ctimebuf[26];
4603
4604 ctime_r(&clock,ctimebuf);
4605 ctimebuf[24] = '\0'; /* Remove newline. */
4606 master->failover_delay_logged = master->failover_start_time;
4607 serverLog(LL_WARNING3,
4608 "Next failover delay: I will not start a failover before %s",
4609 ctimebuf);
4610 }
4611 return 0;
4612 }
4613
4614 sentinelStartFailover(master);
4615 return 1;
4616}
4617
4618/* Select a suitable slave to promote. The current algorithm only uses
4619 * the following parameters:
4620 *
4621 * 1) None of the following conditions: S_DOWN, O_DOWN, DISCONNECTED.
4622 * 2) Last time the slave replied to ping no more than 5 times the PING period.
4623 * 3) info_refresh not older than 3 times the INFO refresh period.
4624 * 4) master_link_down_time no more than:
4625 * (now - master->s_down_since_time) + (master->down_after_period * 10).
4626 * Basically since the master is down from our POV, the slave reports
4627 * to be disconnected no more than 10 times the configured down-after-period.
4628 * This is pretty much black magic but the idea is, the master was not
4629 * available so the slave may be lagging, but not over a certain time.
4630 * Anyway we'll select the best slave according to replication offset.
4631 * 5) Slave priority can't be zero, otherwise the slave is discarded.
4632 *
4633 * Among all the slaves matching the above conditions we select the slave
4634 * with, in order of sorting key:
4635 *
4636 * - lower slave_priority.
4637 * - bigger processed replication offset.
4638 * - lexicographically smaller runid.
4639 *
4640 * Basically if runid is the same, the slave that processed more commands
4641 * from the master is selected.
4642 *
4643 * The function returns the pointer to the selected slave, otherwise
4644 * NULL if no suitable slave was found.
4645 */
4646
4647/* Helper for sentinelSelectSlave(). This is used by qsort() in order to
4648 * sort suitable slaves in a "better first" order, to take the first of
4649 * the list. */
4650int compareSlavesForPromotion(const void *a, const void *b) {
4651 sentinelRedisInstance **sa = (sentinelRedisInstance **)a,
4652 **sb = (sentinelRedisInstance **)b;
4653 char *sa_runid, *sb_runid;
4654
4655 if ((*sa)->slave_priority != (*sb)->slave_priority)
4656 return (*sa)->slave_priority - (*sb)->slave_priority;
4657
4658 /* If priority is the same, select the slave with greater replication
4659 * offset (processed more data from the master). */
4660 if ((*sa)->slave_repl_offset > (*sb)->slave_repl_offset) {
4661 return -1; /* a < b */
4662 } else if ((*sa)->slave_repl_offset < (*sb)->slave_repl_offset) {
4663 return 1; /* a > b */
4664 }
4665
4666 /* If the replication offset is the same select the slave with that has
4667 * the lexicographically smaller runid. Note that we try to handle runid
4668 * == NULL as there are old Redis versions that don't publish runid in
4669 * INFO. A NULL runid is considered bigger than any other runid. */
4670 sa_runid = (*sa)->runid;
4671 sb_runid = (*sb)->runid;
4672 if (sa_runid == NULL((void*)0) && sb_runid == NULL((void*)0)) return 0;
4673 else if (sa_runid == NULL((void*)0)) return 1; /* a > b */
4674 else if (sb_runid == NULL((void*)0)) return -1; /* a < b */
4675 return strcasecmp(sa_runid, sb_runid);
4676}
4677
4678sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master) {
4679 sentinelRedisInstance **instance =
4680 zmalloc(sizeof(instance[0])*dictSize(master->slaves)((master->slaves)->ht[0].used+(master->slaves)->ht
[1].used)
);
4681 sentinelRedisInstance *selected = NULL((void*)0);
4682 int instances = 0;
4683 dictIterator *di;
4684 dictEntry *de;
4685 mstime_t max_master_down_time = 0;
4686
4687 if (master->flags & SRI_S_DOWN(1<<3))
4688 max_master_down_time += mstime() - master->s_down_since_time;
4689 max_master_down_time += master->down_after_period * 10;
4690
4691 di = dictGetIterator(master->slaves);
4692 while((de = dictNext(di)) != NULL((void*)0)) {
4693 sentinelRedisInstance *slave = dictGetVal(de)((de)->v.val);
4694 mstime_t info_validity_time;
4695
4696 if (slave->flags & (SRI_S_DOWN(1<<3)|SRI_O_DOWN(1<<4))) continue;
4697 if (slave->link->disconnected) continue;
4698 if (mstime() - slave->link->last_avail_time > SENTINEL_PING_PERIOD1000*5) continue;
4699 if (slave->slave_priority == 0) continue;
4700
4701 /* If the master is in SDOWN state we get INFO for slaves every second.
4702 * Otherwise we get it with the usual period so we need to account for
4703 * a larger delay. */
4704 if (master->flags & SRI_S_DOWN(1<<3))
4705 info_validity_time = SENTINEL_PING_PERIOD1000*5;
4706 else
4707 info_validity_time = SENTINEL_INFO_PERIOD10000*3;
4708 if (mstime() - slave->info_refresh > info_validity_time) continue;
4709 if (slave->master_link_down_time > max_master_down_time) continue;
4710 instance[instances++] = slave;
4711 }
4712 dictReleaseIterator(di);
4713 if (instances) {
4714 qsort(instance,instances,sizeof(sentinelRedisInstance*),
4715 compareSlavesForPromotion);
4716 selected = instance[0];
4717 }
4718 zfree(instance);
4719 return selected;
4720}
4721
4722/* ---------------- Failover state machine implementation ------------------- */
4723void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
4724 char *leader;
4725 int isleader;
4726
4727 /* Check if we are the leader for the failover epoch. */
4728 leader = sentinelGetLeader(ri, ri->failover_epoch);
4729 isleader = leader && strcasecmp(leader,sentinel.myid) == 0;
4730 sdsfree(leader);
4731
4732 /* If I'm not the leader, and it is not a forced failover via
4733 * SENTINEL FAILOVER, then I can't continue with the failover. */
4734 if (!isleader && !(ri->flags & SRI_FORCE_FAILOVER(1<<11))) {
4735 int election_timeout = SENTINEL_ELECTION_TIMEOUT10000;
4736
4737 /* The election timeout is the MIN between SENTINEL_ELECTION_TIMEOUT
4738 * and the configured failover timeout. */
4739 if (election_timeout > ri->failover_timeout)
4740 election_timeout = ri->failover_timeout;
4741 /* Abort the failover if I'm not the leader after some time. */
4742 if (mstime() - ri->failover_start_time > election_timeout) {
4743 sentinelEvent(LL_WARNING3,"-failover-abort-not-elected",ri,"%@");
4744 sentinelAbortFailover(ri);
4745 }
4746 return;
4747 }
4748 sentinelEvent(LL_WARNING3,"+elected-leader",ri,"%@");
4749 if (sentinel.simfailure_flags & SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION(1<<0))
4750 sentinelSimFailureCrash();
4751 ri->failover_state = SENTINEL_FAILOVER_STATE_SELECT_SLAVE2;
4752 ri->failover_state_change_time = mstime();
4753 sentinelEvent(LL_WARNING3,"+failover-state-select-slave",ri,"%@");
4754}
4755
4756void sentinelFailoverSelectSlave(sentinelRedisInstance *ri) {
4757 sentinelRedisInstance *slave = sentinelSelectSlave(ri);
4758
4759 /* We don't handle the timeout in this state as the function aborts
4760 * the failover or go forward in the next state. */
4761 if (slave == NULL((void*)0)) {
4762 sentinelEvent(LL_WARNING3,"-failover-abort-no-good-slave",ri,"%@");
4763 sentinelAbortFailover(ri);
4764 } else {
4765 sentinelEvent(LL_WARNING3,"+selected-slave",slave,"%@");
4766 slave->flags |= SRI_PROMOTED(1<<7);
4767 ri->promoted_slave = slave;
4768 ri->failover_state = SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE3;
4769 ri->failover_state_change_time = mstime();
4770 sentinelEvent(LL_NOTICE2,"+failover-state-send-slaveof-noone",
4771 slave, "%@");
4772 }
4773}
4774
4775void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
4776 int retval;
4777
4778 /* We can't send the command to the promoted slave if it is now
4779 * disconnected. Retry again and again with this state until the timeout
4780 * is reached, then abort the failover. */
4781 if (ri->promoted_slave->link->disconnected) {
4782 if (mstime() - ri->failover_state_change_time > ri->failover_timeout) {
4783 sentinelEvent(LL_WARNING3,"-failover-abort-slave-timeout",ri,"%@");
4784 sentinelAbortFailover(ri);
4785 }
4786 return;
4787 }
4788
4789 /* Send SLAVEOF NO ONE command to turn the slave into a master.
4790 * We actually register a generic callback for this command as we don't
4791 * really care about the reply. We check if it worked indirectly observing
4792 * if INFO returns a different role (master instead of slave). */
4793 retval = sentinelSendSlaveOf(ri->promoted_slave,NULL((void*)0));
4794 if (retval != C_OK0) return;
4795 sentinelEvent(LL_NOTICE2, "+failover-state-wait-promotion",
4796 ri->promoted_slave,"%@");
4797 ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_PROMOTION4;
4798 ri->failover_state_change_time = mstime();
4799}
4800
4801/* We actually wait for promotion indirectly checking with INFO when the
4802 * slave turns into a master. */
4803void sentinelFailoverWaitPromotion(sentinelRedisInstance *ri) {
4804 /* Just handle the timeout. Switching to the next state is handled
4805 * by the function parsing the INFO command of the promoted slave. */
4806 if (mstime() - ri->failover_state_change_time > ri->failover_timeout) {
4807 sentinelEvent(LL_WARNING3,"-failover-abort-slave-timeout",ri,"%@");
4808 sentinelAbortFailover(ri);
4809 }
4810}
4811
4812void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
4813 int not_reconfigured = 0, timeout = 0;
4814 dictIterator *di;
4815 dictEntry *de;
4816 mstime_t elapsed = mstime() - master->failover_state_change_time;
4817
4818 /* We can't consider failover finished if the promoted slave is
4819 * not reachable. */
4820 if (master->promoted_slave == NULL((void*)0) ||
4821 master->promoted_slave->flags & SRI_S_DOWN(1<<3)) return;
4822
4823 /* The failover terminates once all the reachable slaves are properly
4824 * configured. */
4825 di = dictGetIterator(master->slaves);
4826 while((de = dictNext(di)) != NULL((void*)0)) {
4827 sentinelRedisInstance *slave = dictGetVal(de)((de)->v.val);
4828
4829 if (slave->flags & (SRI_PROMOTED(1<<7)|SRI_RECONF_DONE(1<<10))) continue;
4830 if (slave->flags & SRI_S_DOWN(1<<3)) continue;
4831 not_reconfigured++;
4832 }
4833 dictReleaseIterator(di);
4834
4835 /* Force end of failover on timeout. */
4836 if (elapsed > master->failover_timeout) {
4837 not_reconfigured = 0;
4838 timeout = 1;
4839 sentinelEvent(LL_WARNING3,"+failover-end-for-timeout",master,"%@");
4840 }
4841
4842 if (not_reconfigured == 0) {
4843 sentinelEvent(LL_WARNING3,"+failover-end",master,"%@");
4844 master->failover_state = SENTINEL_FAILOVER_STATE_UPDATE_CONFIG6;
4845 master->failover_state_change_time = mstime();
4846 }
4847
4848 /* If I'm the leader it is a good idea to send a best effort SLAVEOF
4849 * command to all the slaves still not reconfigured to replicate with
4850 * the new master. */
4851 if (timeout) {
4852 dictIterator *di;
4853 dictEntry *de;
4854
4855 di = dictGetIterator(master->slaves);
4856 while((de = dictNext(di)) != NULL((void*)0)) {
4857 sentinelRedisInstance *slave = dictGetVal(de)((de)->v.val);
4858 int retval;
4859
4860 if (slave->flags & (SRI_PROMOTED(1<<7)|SRI_RECONF_DONE(1<<10)|SRI_RECONF_SENT(1<<8))) continue;
4861 if (slave->link->disconnected) continue;
4862
4863 retval = sentinelSendSlaveOf(slave,master->promoted_slave->addr);
4864 if (retval == C_OK0) {
4865 sentinelEvent(LL_NOTICE2,"+slave-reconf-sent-be",slave,"%@");
4866 slave->flags |= SRI_RECONF_SENT(1<<8);
4867 }
4868 }
4869 dictReleaseIterator(di);
4870 }
4871}
4872
4873/* Send SLAVE OF <new master address> to all the remaining slaves that
4874 * still don't appear to have the configuration updated. */
4875void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
4876 dictIterator *di;
4877 dictEntry *de;
4878 int in_progress = 0;
4879
4880 di = dictGetIterator(master->slaves);
4881 while((de = dictNext(di)) != NULL((void*)0)) {
4882 sentinelRedisInstance *slave = dictGetVal(de)((de)->v.val);
4883
4884 if (slave->flags & (SRI_RECONF_SENT(1<<8)|SRI_RECONF_INPROG(1<<9)))
4885 in_progress++;
4886 }
4887 dictReleaseIterator(di);
4888
4889 di = dictGetIterator(master->slaves);
4890 while(in_progress < master->parallel_syncs &&
4891 (de = dictNext(di)) != NULL((void*)0))
4892 {
4893 sentinelRedisInstance *slave = dictGetVal(de)((de)->v.val);
4894 int retval;
4895
4896 /* Skip the promoted slave, and already configured slaves. */
4897 if (slave->flags & (SRI_PROMOTED(1<<7)|SRI_RECONF_DONE(1<<10))) continue;
4898
4899 /* If too much time elapsed without the slave moving forward to
4900 * the next state, consider it reconfigured even if it is not.
4901 * Sentinels will detect the slave as misconfigured and fix its
4902 * configuration later. */
4903 if ((slave->flags & SRI_RECONF_SENT(1<<8)) &&
4904 (mstime() - slave->slave_reconf_sent_time) >
4905 SENTINEL_SLAVE_RECONF_TIMEOUT10000)
4906 {
4907 sentinelEvent(LL_NOTICE2,"-slave-reconf-sent-timeout",slave,"%@");
4908 slave->flags &= ~SRI_RECONF_SENT(1<<8);
4909 slave->flags |= SRI_RECONF_DONE(1<<10);
4910 }
4911
4912 /* Nothing to do for instances that are disconnected or already
4913 * in RECONF_SENT state. */
4914 if (slave->flags & (SRI_RECONF_SENT(1<<8)|SRI_RECONF_INPROG(1<<9))) continue;
4915 if (slave->link->disconnected) continue;
4916
4917 /* Send SLAVEOF <new master>. */
4918 retval = sentinelSendSlaveOf(slave,master->promoted_slave->addr);
4919 if (retval == C_OK0) {
4920 slave->flags |= SRI_RECONF_SENT(1<<8);
4921 slave->slave_reconf_sent_time = mstime();
4922 sentinelEvent(LL_NOTICE2,"+slave-reconf-sent",slave,"%@");
4923 in_progress++;
4924 }
4925 }
4926 dictReleaseIterator(di);
4927
4928 /* Check if all the slaves are reconfigured and handle timeout. */
4929 sentinelFailoverDetectEnd(master);
4930}
4931
4932/* This function is called when the slave is in
4933 * SENTINEL_FAILOVER_STATE_UPDATE_CONFIG state. In this state we need
4934 * to remove it from the master table and add the promoted slave instead. */
4935void sentinelFailoverSwitchToPromotedSlave(sentinelRedisInstance *master) {
4936 sentinelRedisInstance *ref = master->promoted_slave ?
4937 master->promoted_slave : master;
4938
4939 sentinelEvent(LL_WARNING3,"+switch-master",master,"%s %s %d %s %d",
4940 master->name, announceSentinelAddr(master->addr), master->addr->port,
4941 announceSentinelAddr(ref->addr), ref->addr->port);
4942
4943 sentinelResetMasterAndChangeAddress(master,ref->addr->hostname,ref->addr->port);
4944}
4945
4946void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
4947 serverAssert(ri->flags & SRI_MASTER)((ri->flags & (1<<0))?(void)0 : (_serverAssert("ri->flags & SRI_MASTER"
,"sentinel.c",4947),__builtin_unreachable()))
;
4948
4949 if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS(1<<6))) return;
4950
4951 switch(ri->failover_state) {
4952 case SENTINEL_FAILOVER_STATE_WAIT_START1:
4953 sentinelFailoverWaitStart(ri);
4954 break;
4955 case SENTINEL_FAILOVER_STATE_SELECT_SLAVE2:
4956 sentinelFailoverSelectSlave(ri);
4957 break;
4958 case SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE3:
4959 sentinelFailoverSendSlaveOfNoOne(ri);
4960 break;
4961 case SENTINEL_FAILOVER_STATE_WAIT_PROMOTION4:
4962 sentinelFailoverWaitPromotion(ri);
4963 break;
4964 case SENTINEL_FAILOVER_STATE_RECONF_SLAVES5:
4965 sentinelFailoverReconfNextSlave(ri);
4966 break;
4967 }
4968}
4969
4970/* Abort a failover in progress:
4971 *
4972 * This function can only be called before the promoted slave acknowledged
4973 * the slave -> master switch. Otherwise the failover can't be aborted and
4974 * will reach its end (possibly by timeout). */
4975void sentinelAbortFailover(sentinelRedisInstance *ri) {
4976 serverAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS)((ri->flags & (1<<6))?(void)0 : (_serverAssert("ri->flags & SRI_FAILOVER_IN_PROGRESS"
,"sentinel.c",4976),__builtin_unreachable()))
;
4977 serverAssert(ri->failover_state <= SENTINEL_FAILOVER_STATE_WAIT_PROMOTION)((ri->failover_state <= 4)?(void)0 : (_serverAssert("ri->failover_state <= SENTINEL_FAILOVER_STATE_WAIT_PROMOTION"
,"sentinel.c",4977),__builtin_unreachable()))
;
4978
4979 ri->flags &= ~(SRI_FAILOVER_IN_PROGRESS(1<<6)|SRI_FORCE_FAILOVER(1<<11));
4980 ri->failover_state = SENTINEL_FAILOVER_STATE_NONE0;
4981 ri->failover_state_change_time = mstime();
4982 if (ri->promoted_slave) {
4983 ri->promoted_slave->flags &= ~SRI_PROMOTED(1<<7);
4984 ri->promoted_slave = NULL((void*)0);
4985 }
4986}
4987
4988/* ======================== SENTINEL timer handler ==========================
4989 * This is the "main" our Sentinel, being sentinel completely non blocking
4990 * in design. The function is called every second.
4991 * -------------------------------------------------------------------------- */
4992
4993/* Perform scheduled operations for the specified Redis instance. */
4994void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
4995 /* ========== MONITORING HALF ============ */
4996 /* Every kind of instance */
4997 sentinelReconnectInstance(ri);
4998 sentinelSendPeriodicCommands(ri);
4999
5000 /* ============== ACTING HALF ============= */
5001 /* We don't proceed with the acting half if we are in TILT mode.
5002 * TILT happens when we find something odd with the time, like a
5003 * sudden change in the clock. */
5004 if (sentinel.tilt) {
5005 if (mstime()-sentinel.tilt_start_time < SENTINEL_TILT_PERIOD(1000*30)) return;
5006 sentinel.tilt = 0;
5007 sentinelEvent(LL_WARNING3,"-tilt",NULL((void*)0),"#tilt mode exited");
5008 }
5009
5010 /* Every kind of instance */
5011 sentinelCheckSubjectivelyDown(ri);
5012
5013 /* Masters and slaves */
5014 if (ri->flags & (SRI_MASTER(1<<0)|SRI_SLAVE(1<<1))) {
5015 /* Nothing so far. */
5016 }
5017
5018 /* Only masters */
5019 if (ri->flags & SRI_MASTER(1<<0)) {
5020 sentinelCheckObjectivelyDown(ri);
5021 if (sentinelStartFailoverIfNeeded(ri))
5022 sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_ASK_FORCED(1<<0));
5023 sentinelFailoverStateMachine(ri);
5024 sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_NO_FLAGS0);
5025 }
5026}
5027
5028/* Perform scheduled operations for all the instances in the dictionary.
5029 * Recursively call the function against dictionaries of slaves. */
5030void sentinelHandleDictOfRedisInstances(dict *instances) {
5031 dictIterator *di;
5032 dictEntry *de;
5033 sentinelRedisInstance *switch_to_promoted = NULL((void*)0);
5034
5035 /* There are a number of things we need to perform against every master. */
5036 di = dictGetIterator(instances);
5037 while((de = dictNext(di)) != NULL((void*)0)) {
5038 sentinelRedisInstance *ri = dictGetVal(de)((de)->v.val);
5039
5040 sentinelHandleRedisInstance(ri);
5041 if (ri->flags & SRI_MASTER(1<<0)) {
5042 sentinelHandleDictOfRedisInstances(ri->slaves);
5043 sentinelHandleDictOfRedisInstances(ri->sentinels);
5044 if (ri->failover_state == SENTINEL_FAILOVER_STATE_UPDATE_CONFIG6) {
5045 switch_to_promoted = ri;
5046 }
5047 }
5048 }
5049 if (switch_to_promoted)
5050 sentinelFailoverSwitchToPromotedSlave(switch_to_promoted);
5051 dictReleaseIterator(di);
5052}
5053
5054/* This function checks if we need to enter the TITL mode.
5055 *
5056 * The TILT mode is entered if we detect that between two invocations of the
5057 * timer interrupt, a negative amount of time, or too much time has passed.
5058 * Note that we expect that more or less just 100 milliseconds will pass
5059 * if everything is fine. However we'll see a negative number or a
5060 * difference bigger than SENTINEL_TILT_TRIGGER milliseconds if one of the
5061 * following conditions happen:
5062 *
5063 * 1) The Sentinel process for some time is blocked, for every kind of
5064 * random reason: the load is huge, the computer was frozen for some time
5065 * in I/O or alike, the process was stopped by a signal. Everything.
5066 * 2) The system clock was altered significantly.
5067 *
5068 * Under both this conditions we'll see everything as timed out and failing
5069 * without good reasons. Instead we enter the TILT mode and wait
5070 * for SENTINEL_TILT_PERIOD to elapse before starting to act again.
5071 *
5072 * During TILT time we still collect information, we just do not act. */
5073void sentinelCheckTiltCondition(void) {
5074 mstime_t now = mstime();
5075 mstime_t delta = now - sentinel.previous_time;
5076
5077 if (delta < 0 || delta > SENTINEL_TILT_TRIGGER2000) {
5078 sentinel.tilt = 1;
5079 sentinel.tilt_start_time = mstime();
5080 sentinelEvent(LL_WARNING3,"+tilt",NULL((void*)0),"#tilt mode entered");
5081 }
5082 sentinel.previous_time = mstime();
5083}
5084
5085void sentinelTimer(void) {
5086 sentinelCheckTiltCondition();
5087 sentinelHandleDictOfRedisInstances(sentinel.masters);
5088 sentinelRunPendingScripts();
5089 sentinelCollectTerminatedScripts();
5090 sentinelKillTimedoutScripts();
5091
5092 /* We continuously change the frequency of the Redis "timer interrupt"
5093 * in order to desynchronize every Sentinel from every other.
5094 * This non-determinism avoids that Sentinels started at the same time
5095 * exactly continue to stay synchronized asking to be voted at the
5096 * same time again and again (resulting in nobody likely winning the
5097 * election because of split brain voting). */
5098 server.hz = CONFIG_DEFAULT_HZ10 + rand() % CONFIG_DEFAULT_HZ10;
5099}