提交 e2641e09 编写于 作者: A antirez

redis.c split into many different C files.

networking related stuff moved into networking.c

moved more code

more work on layout of source code

SDS instantaneuos memory saving. By Pieter and Salvatore at VMware ;)

cleanly compiling again after the first split, now splitting it in more C files

moving more things around... work in progress

split replication code

splitting more

Sets split

Hash split

replication split

even more splitting

more splitting

minor change
上级 c2ff0e90
此差异已折叠。
/*
* Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __REDIS_H__
#define __REDIS_H__
enum
{
REG_GS = 0,
# define REG_GS REG_GS
REG_FS,
# define REG_FS REG_FS
REG_ES,
# define REG_ES REG_ES
REG_DS,
# define REG_DS REG_DS
REG_EDI,
# define REG_EDI REG_EDI
REG_ESI,
# define REG_ESI REG_ESI
REG_EBP,
# define REG_EBP REG_EBP
REG_ESP,
# define REG_ESP REG_ESP
REG_EBX,
# define REG_EBX REG_EBX
REG_EDX,
# define REG_EDX REG_EDX
REG_ECX,
# define REG_ECX REG_ECX
REG_EAX,
# define REG_EAX REG_EAX
REG_TRAPNO,
# define REG_TRAPNO REG_TRAPNO
REG_ERR,
# define REG_ERR REG_ERR
REG_EIP,
# define REG_EIP REG_EIP
REG_CS,
# define REG_CS REG_CS
REG_EFL,
# define REG_EFL REG_EFL
REG_UESP,
# define REG_UESP REG_UESP
REG_SS
# define REG_SS REG_SS
};
#endif
......@@ -15,7 +15,7 @@ endif
CCOPT= $(CFLAGS) $(CCLINK) $(ARCH) $(PROF)
DEBUG?= -g -rdynamic -ggdb
OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o
OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o vm.o pubsub.o multi.o debug.o sort.o
BENCHOBJ = ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o
CLIOBJ = anet.o sds.o adlist.o redis-cli.o zmalloc.o linenoise.o
CHECKDUMPOBJ = redis-check-dump.o lzf_c.o lzf_d.o
......@@ -48,7 +48,7 @@ redis-check-dump.o: redis-check-dump.c lzf.h
redis-cli.o: redis-cli.c fmacros.h anet.h sds.h adlist.h zmalloc.h \
linenoise.h
redis.o: redis.c fmacros.h config.h redis.h ae.h sds.h anet.h dict.h \
adlist.h zmalloc.h lzf.h pqsort.h zipmap.h ziplist.h sha1.h staticsymbols.h
adlist.h zmalloc.h lzf.h pqsort.h zipmap.h ziplist.h sha1.h
release.o: release.c release.h
sds.o: sds.c sds.h zmalloc.h
sha1.o: sha1.c sha1.h
......@@ -83,11 +83,8 @@ clean:
dep:
$(CC) -MM *.c
staticsymbols:
tclsh utils/build-static-symbols.tcl > staticsymbols.h
test:
tclsh8.5 tests/test_helper.tcl --tags "${TAGS}"
(cd ..; tclsh8.5 tests/test_helper.tcl --tags "${TAGS}")
bench:
./redis-benchmark
......
此差异已折叠。
#include "redis.h"
/*-----------------------------------------------------------------------------
* Config file parsing
*----------------------------------------------------------------------------*/
int yesnotoi(char *s) {
if (!strcasecmp(s,"yes")) return 1;
else if (!strcasecmp(s,"no")) return 0;
else return -1;
}
void appendServerSaveParams(time_t seconds, int changes) {
server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
server.saveparams[server.saveparamslen].seconds = seconds;
server.saveparams[server.saveparamslen].changes = changes;
server.saveparamslen++;
}
void resetServerSaveParams() {
zfree(server.saveparams);
server.saveparams = NULL;
server.saveparamslen = 0;
}
/* I agree, this is a very rudimental way to load a configuration...
will improve later if the config gets more complex */
void loadServerConfig(char *filename) {
FILE *fp;
char buf[REDIS_CONFIGLINE_MAX+1], *err = NULL;
int linenum = 0;
sds line = NULL;
if (filename[0] == '-' && filename[1] == '\0')
fp = stdin;
else {
if ((fp = fopen(filename,"r")) == NULL) {
redisLog(REDIS_WARNING, "Fatal error, can't open config file '%s'", filename);
exit(1);
}
}
while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
sds *argv;
int argc, j;
linenum++;
line = sdsnew(buf);
line = sdstrim(line," \t\r\n");
/* Skip comments and blank lines*/
if (line[0] == '#' || line[0] == '\0') {
sdsfree(line);
continue;
}
/* Split into arguments */
argv = sdssplitlen(line,sdslen(line)," ",1,&argc);
sdstolower(argv[0]);
/* Execute config directives */
if (!strcasecmp(argv[0],"timeout") && argc == 2) {
server.maxidletime = atoi(argv[1]);
if (server.maxidletime < 0) {
err = "Invalid timeout value"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"port") && argc == 2) {
server.port = atoi(argv[1]);
if (server.port < 1 || server.port > 65535) {
err = "Invalid port"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"bind") && argc == 2) {
server.bindaddr = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"save") && argc == 3) {
int seconds = atoi(argv[1]);
int changes = atoi(argv[2]);
if (seconds < 1 || changes < 0) {
err = "Invalid save parameters"; goto loaderr;
}
appendServerSaveParams(seconds,changes);
} else if (!strcasecmp(argv[0],"dir") && argc == 2) {
if (chdir(argv[1]) == -1) {
redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
argv[1], strerror(errno));
exit(1);
}
} else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
else if (!strcasecmp(argv[1],"verbose")) server.verbosity = REDIS_VERBOSE;
else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
else {
err = "Invalid log level. Must be one of debug, notice, warning";
goto loaderr;
}
} else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
FILE *logfp;
server.logfile = zstrdup(argv[1]);
if (!strcasecmp(server.logfile,"stdout")) {
zfree(server.logfile);
server.logfile = NULL;
}
if (server.logfile) {
/* Test if we are able to open the file. The server will not
* be able to abort just for this problem later... */
logfp = fopen(server.logfile,"a");
if (logfp == NULL) {
err = sdscatprintf(sdsempty(),
"Can't open the log file: %s", strerror(errno));
goto loaderr;
}
fclose(logfp);
}
} else if (!strcasecmp(argv[0],"databases") && argc == 2) {
server.dbnum = atoi(argv[1]);
if (server.dbnum < 1) {
err = "Invalid number of databases"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"include") && argc == 2) {
loadServerConfig(argv[1]);
} else if (!strcasecmp(argv[0],"maxclients") && argc == 2) {
server.maxclients = atoi(argv[1]);
} else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
server.maxmemory = memtoll(argv[1],NULL);
} else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
server.masterhost = sdsnew(argv[1]);
server.masterport = atoi(argv[2]);
server.replstate = REDIS_REPL_CONNECT;
} else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
server.masterauth = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) {
if ((server.rdbcompression = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"activerehashing") && argc == 2) {
if ((server.activerehashing = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
if ((server.daemonize = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
if ((server.appendonly = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"appendfilename") && argc == 2) {
zfree(server.appendfilename);
server.appendfilename = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"no-appendfsync-on-rewrite")
&& argc == 2) {
if ((server.no_appendfsync_on_rewrite= yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
if (!strcasecmp(argv[1],"no")) {
server.appendfsync = APPENDFSYNC_NO;
} else if (!strcasecmp(argv[1],"always")) {
server.appendfsync = APPENDFSYNC_ALWAYS;
} else if (!strcasecmp(argv[1],"everysec")) {
server.appendfsync = APPENDFSYNC_EVERYSEC;
} else {
err = "argument must be 'no', 'always' or 'everysec'";
goto loaderr;
}
} else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
server.requirepass = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
zfree(server.pidfile);
server.pidfile = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
zfree(server.dbfilename);
server.dbfilename = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"vm-enabled") && argc == 2) {
if ((server.vm_enabled = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"vm-swap-file") && argc == 2) {
zfree(server.vm_swap_file);
server.vm_swap_file = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"vm-max-memory") && argc == 2) {
server.vm_max_memory = memtoll(argv[1],NULL);
} else if (!strcasecmp(argv[0],"vm-page-size") && argc == 2) {
server.vm_page_size = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"vm-pages") && argc == 2) {
server.vm_pages = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"vm-max-threads") && argc == 2) {
server.vm_max_threads = strtoll(argv[1], NULL, 10);
} else if (!strcasecmp(argv[0],"hash-max-zipmap-entries") && argc == 2){
server.hash_max_zipmap_entries = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"hash-max-zipmap-value") && argc == 2){
server.hash_max_zipmap_value = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"list-max-ziplist-entries") && argc == 2){
server.list_max_ziplist_entries = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2){
server.list_max_ziplist_value = memtoll(argv[1], NULL);
} else {
err = "Bad directive or wrong number of arguments"; goto loaderr;
}
for (j = 0; j < argc; j++)
sdsfree(argv[j]);
zfree(argv);
sdsfree(line);
}
if (fp != stdin) fclose(fp);
return;
loaderr:
fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR ***\n");
fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
fprintf(stderr, ">>> '%s'\n", line);
fprintf(stderr, "%s\n", err);
exit(1);
}
/*-----------------------------------------------------------------------------
* CONFIG command for remote configuration
*----------------------------------------------------------------------------*/
void configSetCommand(redisClient *c) {
robj *o = getDecodedObject(c->argv[3]);
long long ll;
if (!strcasecmp(c->argv[2]->ptr,"dbfilename")) {
zfree(server.dbfilename);
server.dbfilename = zstrdup(o->ptr);
} else if (!strcasecmp(c->argv[2]->ptr,"requirepass")) {
zfree(server.requirepass);
server.requirepass = zstrdup(o->ptr);
} else if (!strcasecmp(c->argv[2]->ptr,"masterauth")) {
zfree(server.masterauth);
server.masterauth = zstrdup(o->ptr);
} else if (!strcasecmp(c->argv[2]->ptr,"maxmemory")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
ll < 0) goto badfmt;
server.maxmemory = ll;
} else if (!strcasecmp(c->argv[2]->ptr,"timeout")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
ll < 0 || ll > LONG_MAX) goto badfmt;
server.maxidletime = ll;
} else if (!strcasecmp(c->argv[2]->ptr,"appendfsync")) {
if (!strcasecmp(o->ptr,"no")) {
server.appendfsync = APPENDFSYNC_NO;
} else if (!strcasecmp(o->ptr,"everysec")) {
server.appendfsync = APPENDFSYNC_EVERYSEC;
} else if (!strcasecmp(o->ptr,"always")) {
server.appendfsync = APPENDFSYNC_ALWAYS;
} else {
goto badfmt;
}
} else if (!strcasecmp(c->argv[2]->ptr,"no-appendfsync-on-rewrite")) {
int yn = yesnotoi(o->ptr);
if (yn == -1) goto badfmt;
server.no_appendfsync_on_rewrite = yn;
} else if (!strcasecmp(c->argv[2]->ptr,"appendonly")) {
int old = server.appendonly;
int new = yesnotoi(o->ptr);
if (new == -1) goto badfmt;
if (old != new) {
if (new == 0) {
stopAppendOnly();
} else {
if (startAppendOnly() == REDIS_ERR) {
addReplySds(c,sdscatprintf(sdsempty(),
"-ERR Unable to turn on AOF. Check server logs.\r\n"));
decrRefCount(o);
return;
}
}
}
} else if (!strcasecmp(c->argv[2]->ptr,"save")) {
int vlen, j;
sds *v = sdssplitlen(o->ptr,sdslen(o->ptr)," ",1,&vlen);
/* Perform sanity check before setting the new config:
* - Even number of args
* - Seconds >= 1, changes >= 0 */
if (vlen & 1) {
sdsfreesplitres(v,vlen);
goto badfmt;
}
for (j = 0; j < vlen; j++) {
char *eptr;
long val;
val = strtoll(v[j], &eptr, 10);
if (eptr[0] != '\0' ||
((j & 1) == 0 && val < 1) ||
((j & 1) == 1 && val < 0)) {
sdsfreesplitres(v,vlen);
goto badfmt;
}
}
/* Finally set the new config */
resetServerSaveParams();
for (j = 0; j < vlen; j += 2) {
time_t seconds;
int changes;
seconds = strtoll(v[j],NULL,10);
changes = strtoll(v[j+1],NULL,10);
appendServerSaveParams(seconds, changes);
}
sdsfreesplitres(v,vlen);
} else {
addReplySds(c,sdscatprintf(sdsempty(),
"-ERR not supported CONFIG parameter %s\r\n",
(char*)c->argv[2]->ptr));
decrRefCount(o);
return;
}
decrRefCount(o);
addReply(c,shared.ok);
return;
badfmt: /* Bad format errors */
addReplySds(c,sdscatprintf(sdsempty(),
"-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
(char*)o->ptr,
(char*)c->argv[2]->ptr));
decrRefCount(o);
}
void configGetCommand(redisClient *c) {
robj *o = getDecodedObject(c->argv[2]);
robj *lenobj = createObject(REDIS_STRING,NULL);
char *pattern = o->ptr;
int matches = 0;
addReply(c,lenobj);
decrRefCount(lenobj);
if (stringmatch(pattern,"dbfilename",0)) {
addReplyBulkCString(c,"dbfilename");
addReplyBulkCString(c,server.dbfilename);
matches++;
}
if (stringmatch(pattern,"requirepass",0)) {
addReplyBulkCString(c,"requirepass");
addReplyBulkCString(c,server.requirepass);
matches++;
}
if (stringmatch(pattern,"masterauth",0)) {
addReplyBulkCString(c,"masterauth");
addReplyBulkCString(c,server.masterauth);
matches++;
}
if (stringmatch(pattern,"maxmemory",0)) {
char buf[128];
ll2string(buf,128,server.maxmemory);
addReplyBulkCString(c,"maxmemory");
addReplyBulkCString(c,buf);
matches++;
}
if (stringmatch(pattern,"timeout",0)) {
char buf[128];
ll2string(buf,128,server.maxidletime);
addReplyBulkCString(c,"timeout");
addReplyBulkCString(c,buf);
matches++;
}
if (stringmatch(pattern,"appendonly",0)) {
addReplyBulkCString(c,"appendonly");
addReplyBulkCString(c,server.appendonly ? "yes" : "no");
matches++;
}
if (stringmatch(pattern,"no-appendfsync-on-rewrite",0)) {
addReplyBulkCString(c,"no-appendfsync-on-rewrite");
addReplyBulkCString(c,server.no_appendfsync_on_rewrite ? "yes" : "no");
matches++;
}
if (stringmatch(pattern,"appendfsync",0)) {
char *policy;
switch(server.appendfsync) {
case APPENDFSYNC_NO: policy = "no"; break;
case APPENDFSYNC_EVERYSEC: policy = "everysec"; break;
case APPENDFSYNC_ALWAYS: policy = "always"; break;
default: policy = "unknown"; break; /* too harmless to panic */
}
addReplyBulkCString(c,"appendfsync");
addReplyBulkCString(c,policy);
matches++;
}
if (stringmatch(pattern,"save",0)) {
sds buf = sdsempty();
int j;
for (j = 0; j < server.saveparamslen; j++) {
buf = sdscatprintf(buf,"%ld %d",
server.saveparams[j].seconds,
server.saveparams[j].changes);
if (j != server.saveparamslen-1)
buf = sdscatlen(buf," ",1);
}
addReplyBulkCString(c,"save");
addReplyBulkCString(c,buf);
sdsfree(buf);
matches++;
}
decrRefCount(o);
lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",matches*2);
}
void configCommand(redisClient *c) {
if (!strcasecmp(c->argv[1]->ptr,"set")) {
if (c->argc != 4) goto badarity;
configSetCommand(c);
} else if (!strcasecmp(c->argv[1]->ptr,"get")) {
if (c->argc != 3) goto badarity;
configGetCommand(c);
} else if (!strcasecmp(c->argv[1]->ptr,"resetstat")) {
if (c->argc != 2) goto badarity;
server.stat_numcommands = 0;
server.stat_numconnections = 0;
server.stat_expiredkeys = 0;
server.stat_starttime = time(NULL);
addReply(c,shared.ok);
} else {
addReplySds(c,sdscatprintf(sdsempty(),
"-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
}
return;
badarity:
addReplySds(c,sdscatprintf(sdsempty(),
"-ERR Wrong number of arguments for CONFIG %s\r\n",
(char*) c->argv[1]->ptr));
}
#include "redis.h"
#include <signal.h>
/*-----------------------------------------------------------------------------
* C-level DB API
*----------------------------------------------------------------------------*/
robj *lookupKey(redisDb *db, robj *key) {
dictEntry *de = dictFind(db->dict,key->ptr);
if (de) {
robj *val = dictGetEntryVal(de);
if (server.vm_enabled) {
if (val->storage == REDIS_VM_MEMORY ||
val->storage == REDIS_VM_SWAPPING)
{
/* If we were swapping the object out, cancel the operation */
if (val->storage == REDIS_VM_SWAPPING)
vmCancelThreadedIOJob(val);
/* Update the access time for the aging algorithm. */
val->lru = server.lruclock;
} else {
int notify = (val->storage == REDIS_VM_LOADING);
/* Our value was swapped on disk. Bring it at home. */
redisAssert(val->type == REDIS_VMPOINTER);
val = vmLoadObject(val);
dictGetEntryVal(de) = val;
/* Clients blocked by the VM subsystem may be waiting for
* this key... */
if (notify) handleClientsBlockedOnSwappedKey(db,key);
}
}
return val;
} else {
return NULL;
}
}
robj *lookupKeyRead(redisDb *db, robj *key) {
expireIfNeeded(db,key);
return lookupKey(db,key);
}
robj *lookupKeyWrite(redisDb *db, robj *key) {
deleteIfVolatile(db,key);
touchWatchedKey(db,key);
return lookupKey(db,key);
}
robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply) {
robj *o = lookupKeyRead(c->db, key);
if (!o) addReply(c,reply);
return o;
}
robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply) {
robj *o = lookupKeyWrite(c->db, key);
if (!o) addReply(c,reply);
return o;
}
/* Add the key to the DB. If the key already exists REDIS_ERR is returned,
* otherwise REDIS_OK is returned, and the caller should increment the
* refcount of 'val'. */
int dbAdd(redisDb *db, robj *key, robj *val) {
/* Perform a lookup before adding the key, as we need to copy the
* key value. */
if (dictFind(db->dict, key->ptr) != NULL) {
return REDIS_ERR;
} else {
sds copy = sdsdup(key->ptr);
dictAdd(db->dict, copy, val);
return REDIS_OK;
}
}
/* If the key does not exist, this is just like dbAdd(). Otherwise
* the value associated to the key is replaced with the new one.
*
* On update (key already existed) 0 is returned. Otherwise 1. */
int dbReplace(redisDb *db, robj *key, robj *val) {
if (dictFind(db->dict,key->ptr) == NULL) {
sds copy = sdsdup(key->ptr);
dictAdd(db->dict, copy, val);
return 1;
} else {
dictReplace(db->dict, key->ptr, val);
return 0;
}
}
int dbExists(redisDb *db, robj *key) {
return dictFind(db->dict,key->ptr) != NULL;
}
/* Return a random key, in form of a Redis object.
* If there are no keys, NULL is returned.
*
* The function makes sure to return keys not already expired. */
robj *dbRandomKey(redisDb *db) {
struct dictEntry *de;
while(1) {
sds key;
robj *keyobj;
de = dictGetRandomKey(db->dict);
if (de == NULL) return NULL;
key = dictGetEntryKey(de);
keyobj = createStringObject(key,sdslen(key));
if (dictFind(db->expires,key)) {
if (expireIfNeeded(db,keyobj)) {
decrRefCount(keyobj);
continue; /* search for another key. This expired. */
}
}
return keyobj;
}
}
/* Delete a key, value, and associated expiration entry if any, from the DB */
int dbDelete(redisDb *db, robj *key) {
/* Deleting an entry from the expires dict will not free the sds of
* the key, because it is shared with the main dictionary. */
if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
return dictDelete(db->dict,key->ptr) == DICT_OK;
}
/* Empty the whole database */
long long emptyDb() {
int j;
long long removed = 0;
for (j = 0; j < server.dbnum; j++) {
removed += dictSize(server.db[j].dict);
dictEmpty(server.db[j].dict);
dictEmpty(server.db[j].expires);
}
return removed;
}
int selectDb(redisClient *c, int id) {
if (id < 0 || id >= server.dbnum)
return REDIS_ERR;
c->db = &server.db[id];
return REDIS_OK;
}
/*-----------------------------------------------------------------------------
* Type agnostic commands operating on the key space
*----------------------------------------------------------------------------*/
void flushdbCommand(redisClient *c) {
server.dirty += dictSize(c->db->dict);
touchWatchedKeysOnFlush(c->db->id);
dictEmpty(c->db->dict);
dictEmpty(c->db->expires);
addReply(c,shared.ok);
}
void flushallCommand(redisClient *c) {
touchWatchedKeysOnFlush(-1);
server.dirty += emptyDb();
addReply(c,shared.ok);
if (server.bgsavechildpid != -1) {
kill(server.bgsavechildpid,SIGKILL);
rdbRemoveTempFile(server.bgsavechildpid);
}
rdbSave(server.dbfilename);
server.dirty++;
}
void delCommand(redisClient *c) {
int deleted = 0, j;
for (j = 1; j < c->argc; j++) {
if (dbDelete(c->db,c->argv[j])) {
touchWatchedKey(c->db,c->argv[j]);
server.dirty++;
deleted++;
}
}
addReplyLongLong(c,deleted);
}
void existsCommand(redisClient *c) {
expireIfNeeded(c->db,c->argv[1]);
if (dbExists(c->db,c->argv[1])) {
addReply(c, shared.cone);
} else {
addReply(c, shared.czero);
}
}
void selectCommand(redisClient *c) {
int id = atoi(c->argv[1]->ptr);
if (selectDb(c,id) == REDIS_ERR) {
addReplySds(c,sdsnew("-ERR invalid DB index\r\n"));
} else {
addReply(c,shared.ok);
}
}
void randomkeyCommand(redisClient *c) {
robj *key;
if ((key = dbRandomKey(c->db)) == NULL) {
addReply(c,shared.nullbulk);
return;
}
addReplyBulk(c,key);
decrRefCount(key);
}
void keysCommand(redisClient *c) {
dictIterator *di;
dictEntry *de;
sds pattern = c->argv[1]->ptr;
int plen = sdslen(pattern);
unsigned long numkeys = 0;
robj *lenobj = createObject(REDIS_STRING,NULL);
di = dictGetIterator(c->db->dict);
addReply(c,lenobj);
decrRefCount(lenobj);
while((de = dictNext(di)) != NULL) {
sds key = dictGetEntryKey(de);
robj *keyobj;
if ((pattern[0] == '*' && pattern[1] == '\0') ||
stringmatchlen(pattern,plen,key,sdslen(key),0)) {
keyobj = createStringObject(key,sdslen(key));
if (expireIfNeeded(c->db,keyobj) == 0) {
addReplyBulk(c,keyobj);
numkeys++;
}
decrRefCount(keyobj);
}
}
dictReleaseIterator(di);
lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",numkeys);
}
void dbsizeCommand(redisClient *c) {
addReplySds(c,
sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict)));
}
void lastsaveCommand(redisClient *c) {
addReplySds(c,
sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave));
}
void typeCommand(redisClient *c) {
robj *o;
char *type;
o = lookupKeyRead(c->db,c->argv[1]);
if (o == NULL) {
type = "+none";
} else {
switch(o->type) {
case REDIS_STRING: type = "+string"; break;
case REDIS_LIST: type = "+list"; break;
case REDIS_SET: type = "+set"; break;
case REDIS_ZSET: type = "+zset"; break;
case REDIS_HASH: type = "+hash"; break;
default: type = "+unknown"; break;
}
}
addReplySds(c,sdsnew(type));
addReply(c,shared.crlf);
}
void saveCommand(redisClient *c) {
if (server.bgsavechildpid != -1) {
addReplySds(c,sdsnew("-ERR background save in progress\r\n"));
return;
}
if (rdbSave(server.dbfilename) == REDIS_OK) {
addReply(c,shared.ok);
} else {
addReply(c,shared.err);
}
}
void bgsaveCommand(redisClient *c) {
if (server.bgsavechildpid != -1) {
addReplySds(c,sdsnew("-ERR background save already in progress\r\n"));
return;
}
if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
char *status = "+Background saving started\r\n";
addReplySds(c,sdsnew(status));
} else {
addReply(c,shared.err);
}
}
void shutdownCommand(redisClient *c) {
if (prepareForShutdown() == REDIS_OK)
exit(0);
addReplySds(c, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
}
void renameGenericCommand(redisClient *c, int nx) {
robj *o;
/* To use the same key as src and dst is probably an error */
if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
addReply(c,shared.sameobjecterr);
return;
}
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr)) == NULL)
return;
incrRefCount(o);
deleteIfVolatile(c->db,c->argv[2]);
if (dbAdd(c->db,c->argv[2],o) == REDIS_ERR) {
if (nx) {
decrRefCount(o);
addReply(c,shared.czero);
return;
}
dbReplace(c->db,c->argv[2],o);
}
dbDelete(c->db,c->argv[1]);
touchWatchedKey(c->db,c->argv[2]);
server.dirty++;
addReply(c,nx ? shared.cone : shared.ok);
}
void renameCommand(redisClient *c) {
renameGenericCommand(c,0);
}
void renamenxCommand(redisClient *c) {
renameGenericCommand(c,1);
}
void moveCommand(redisClient *c) {
robj *o;
redisDb *src, *dst;
int srcid;
/* Obtain source and target DB pointers */
src = c->db;
srcid = c->db->id;
if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
addReply(c,shared.outofrangeerr);
return;
}
dst = c->db;
selectDb(c,srcid); /* Back to the source DB */
/* If the user is moving using as target the same
* DB as the source DB it is probably an error. */
if (src == dst) {
addReply(c,shared.sameobjecterr);
return;
}
/* Check if the element exists and get a reference */
o = lookupKeyWrite(c->db,c->argv[1]);
if (!o) {
addReply(c,shared.czero);
return;
}
/* Try to add the element to the target DB */
deleteIfVolatile(dst,c->argv[1]);
if (dbAdd(dst,c->argv[1],o) == REDIS_ERR) {
addReply(c,shared.czero);
return;
}
incrRefCount(o);
/* OK! key moved, free the entry in the source DB */
dbDelete(src,c->argv[1]);
server.dirty++;
addReply(c,shared.cone);
}
/*-----------------------------------------------------------------------------
* Expires API
*----------------------------------------------------------------------------*/
int removeExpire(redisDb *db, robj *key) {
/* An expire may only be removed if there is a corresponding entry in the
* main dict. Otherwise, the key will never be freed. */
redisAssert(dictFind(db->dict,key->ptr) != NULL);
if (dictDelete(db->expires,key->ptr) == DICT_OK) {
return 1;
} else {
return 0;
}
}
int setExpire(redisDb *db, robj *key, time_t when) {
dictEntry *de;
/* Reuse the sds from the main dict in the expire dict */
redisAssert((de = dictFind(db->dict,key->ptr)) != NULL);
if (dictAdd(db->expires,dictGetEntryKey(de),(void*)when) == DICT_ERR) {
return 0;
} else {
return 1;
}
}
/* Return the expire time of the specified key, or -1 if no expire
* is associated with this key (i.e. the key is non volatile) */
time_t getExpire(redisDb *db, robj *key) {
dictEntry *de;
/* No expire? return ASAP */
if (dictSize(db->expires) == 0 ||
(de = dictFind(db->expires,key->ptr)) == NULL) return -1;
/* The entry was found in the expire dict, this means it should also
* be present in the main dict (safety check). */
redisAssert(dictFind(db->dict,key->ptr) != NULL);
return (time_t) dictGetEntryVal(de);
}
int expireIfNeeded(redisDb *db, robj *key) {
time_t when = getExpire(db,key);
if (when < 0) return 0;
/* Return when this key has not expired */
if (time(NULL) <= when) return 0;
/* Delete the key */
server.stat_expiredkeys++;
server.dirty++;
return dbDelete(db,key);
}
int deleteIfVolatile(redisDb *db, robj *key) {
if (getExpire(db,key) < 0) return 0;
/* Delete the key */
server.stat_expiredkeys++;
server.dirty++;
return dbDelete(db,key);
}
/*-----------------------------------------------------------------------------
* Expires Commands
*----------------------------------------------------------------------------*/
void expireGenericCommand(redisClient *c, robj *key, robj *param, long offset) {
dictEntry *de;
time_t seconds;
if (getLongFromObjectOrReply(c, param, &seconds, NULL) != REDIS_OK) return;
seconds -= offset;
de = dictFind(c->db->dict,key->ptr);
if (de == NULL) {
addReply(c,shared.czero);
return;
}
if (seconds <= 0) {
if (dbDelete(c->db,key)) server.dirty++;
addReply(c, shared.cone);
return;
} else {
time_t when = time(NULL)+seconds;
if (setExpire(c->db,key,when)) {
addReply(c,shared.cone);
server.dirty++;
} else {
addReply(c,shared.czero);
}
return;
}
}
void expireCommand(redisClient *c) {
expireGenericCommand(c,c->argv[1],c->argv[2],0);
}
void expireatCommand(redisClient *c) {
expireGenericCommand(c,c->argv[1],c->argv[2],time(NULL));
}
void ttlCommand(redisClient *c) {
time_t expire;
int ttl = -1;
expire = getExpire(c->db,c->argv[1]);
if (expire != -1) {
ttl = (int) (expire-time(NULL));
if (ttl < 0) ttl = -1;
}
addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
}
#include "redis.h"
#include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
/* ================================= Debugging ============================== */
/* Compute the sha1 of string at 's' with 'len' bytes long.
* The SHA1 is then xored againt the string pointed by digest.
* Since xor is commutative, this operation is used in order to
* "add" digests relative to unordered elements.
*
* So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
void xorDigest(unsigned char *digest, void *ptr, size_t len) {
SHA1_CTX ctx;
unsigned char hash[20], *s = ptr;
int j;
SHA1Init(&ctx);
SHA1Update(&ctx,s,len);
SHA1Final(hash,&ctx);
for (j = 0; j < 20; j++)
digest[j] ^= hash[j];
}
void xorObjectDigest(unsigned char *digest, robj *o) {
o = getDecodedObject(o);
xorDigest(digest,o->ptr,sdslen(o->ptr));
decrRefCount(o);
}
/* This function instead of just computing the SHA1 and xoring it
* against diget, also perform the digest of "digest" itself and
* replace the old value with the new one.
*
* So the final digest will be:
*
* digest = SHA1(digest xor SHA1(data))
*
* This function is used every time we want to preserve the order so
* that digest(a,b,c,d) will be different than digest(b,c,d,a)
*
* Also note that mixdigest("foo") followed by mixdigest("bar")
* will lead to a different digest compared to "fo", "obar".
*/
void mixDigest(unsigned char *digest, void *ptr, size_t len) {
SHA1_CTX ctx;
char *s = ptr;
xorDigest(digest,s,len);
SHA1Init(&ctx);
SHA1Update(&ctx,digest,20);
SHA1Final(digest,&ctx);
}
void mixObjectDigest(unsigned char *digest, robj *o) {
o = getDecodedObject(o);
mixDigest(digest,o->ptr,sdslen(o->ptr));
decrRefCount(o);
}
/* Compute the dataset digest. Since keys, sets elements, hashes elements
* are not ordered, we use a trick: every aggregate digest is the xor
* of the digests of their elements. This way the order will not change
* the result. For list instead we use a feedback entering the output digest
* as input in order to ensure that a different ordered list will result in
* a different digest. */
void computeDatasetDigest(unsigned char *final) {
unsigned char digest[20];
char buf[128];
dictIterator *di = NULL;
dictEntry *de;
int j;
uint32_t aux;
memset(final,0,20); /* Start with a clean result */
for (j = 0; j < server.dbnum; j++) {
redisDb *db = server.db+j;
if (dictSize(db->dict) == 0) continue;
di = dictGetIterator(db->dict);
/* hash the DB id, so the same dataset moved in a different
* DB will lead to a different digest */
aux = htonl(j);
mixDigest(final,&aux,sizeof(aux));
/* Iterate this DB writing every entry */
while((de = dictNext(di)) != NULL) {
sds key;
robj *keyobj, *o;
time_t expiretime;
memset(digest,0,20); /* This key-val digest */
key = dictGetEntryKey(de);
keyobj = createStringObject(key,sdslen(key));
mixDigest(digest,key,sdslen(key));
/* Make sure the key is loaded if VM is active */
o = lookupKeyRead(db,keyobj);
aux = htonl(o->type);
mixDigest(digest,&aux,sizeof(aux));
expiretime = getExpire(db,keyobj);
/* Save the key and associated value */
if (o->type == REDIS_STRING) {
mixObjectDigest(digest,o);
} else if (o->type == REDIS_LIST) {
listTypeIterator *li = listTypeInitIterator(o,0,REDIS_TAIL);
listTypeEntry entry;
while(listTypeNext(li,&entry)) {
robj *eleobj = listTypeGet(&entry);
mixObjectDigest(digest,eleobj);
decrRefCount(eleobj);
}
listTypeReleaseIterator(li);
} else if (o->type == REDIS_SET) {
dict *set = o->ptr;
dictIterator *di = dictGetIterator(set);
dictEntry *de;
while((de = dictNext(di)) != NULL) {
robj *eleobj = dictGetEntryKey(de);
xorObjectDigest(digest,eleobj);
}
dictReleaseIterator(di);
} else if (o->type == REDIS_ZSET) {
zset *zs = o->ptr;
dictIterator *di = dictGetIterator(zs->dict);
dictEntry *de;
while((de = dictNext(di)) != NULL) {
robj *eleobj = dictGetEntryKey(de);
double *score = dictGetEntryVal(de);
unsigned char eledigest[20];
snprintf(buf,sizeof(buf),"%.17g",*score);
memset(eledigest,0,20);
mixObjectDigest(eledigest,eleobj);
mixDigest(eledigest,buf,strlen(buf));
xorDigest(digest,eledigest,20);
}
dictReleaseIterator(di);
} else if (o->type == REDIS_HASH) {
hashTypeIterator *hi;
robj *obj;
hi = hashTypeInitIterator(o);
while (hashTypeNext(hi) != REDIS_ERR) {
unsigned char eledigest[20];
memset(eledigest,0,20);
obj = hashTypeCurrent(hi,REDIS_HASH_KEY);
mixObjectDigest(eledigest,obj);
decrRefCount(obj);
obj = hashTypeCurrent(hi,REDIS_HASH_VALUE);
mixObjectDigest(eledigest,obj);
decrRefCount(obj);
xorDigest(digest,eledigest,20);
}
hashTypeReleaseIterator(hi);
} else {
redisPanic("Unknown object type");
}
/* If the key has an expire, add it to the mix */
if (expiretime != -1) xorDigest(digest,"!!expire!!",10);
/* We can finally xor the key-val digest to the final digest */
xorDigest(final,digest,20);
decrRefCount(keyobj);
}
dictReleaseIterator(di);
}
}
void debugCommand(redisClient *c) {
if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
*((char*)-1) = 'x';
} else if (!strcasecmp(c->argv[1]->ptr,"reload")) {
if (rdbSave(server.dbfilename) != REDIS_OK) {
addReply(c,shared.err);
return;
}
emptyDb();
if (rdbLoad(server.dbfilename) != REDIS_OK) {
addReply(c,shared.err);
return;
}
redisLog(REDIS_WARNING,"DB reloaded by DEBUG RELOAD");
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
emptyDb();
if (loadAppendOnlyFile(server.appendfilename) != REDIS_OK) {
addReply(c,shared.err);
return;
}
redisLog(REDIS_WARNING,"Append Only File loaded by DEBUG LOADAOF");
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
dictEntry *de = dictFind(c->db->dict,c->argv[2]->ptr);
robj *val;
if (!de) {
addReply(c,shared.nokeyerr);
return;
}
val = dictGetEntryVal(de);
if (!server.vm_enabled || (val->storage == REDIS_VM_MEMORY ||
val->storage == REDIS_VM_SWAPPING)) {
char *strenc;
strenc = strEncoding(val->encoding);
addReplySds(c,sdscatprintf(sdsempty(),
"+Value at:%p refcount:%d "
"encoding:%s serializedlength:%lld\r\n",
(void*)val, val->refcount,
strenc, (long long) rdbSavedObjectLen(val,NULL)));
} else {
vmpointer *vp = (vmpointer*) val;
addReplySds(c,sdscatprintf(sdsempty(),
"+Value swapped at: page %llu "
"using %llu pages\r\n",
(unsigned long long) vp->page,
(unsigned long long) vp->usedpages));
}
} else if (!strcasecmp(c->argv[1]->ptr,"swapin") && c->argc == 3) {
lookupKeyRead(c->db,c->argv[2]);
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"swapout") && c->argc == 3) {
dictEntry *de = dictFind(c->db->dict,c->argv[2]->ptr);
robj *val;
vmpointer *vp;
if (!server.vm_enabled) {
addReplySds(c,sdsnew("-ERR Virtual Memory is disabled\r\n"));
return;
}
if (!de) {
addReply(c,shared.nokeyerr);
return;
}
val = dictGetEntryVal(de);
/* Swap it */
if (val->storage != REDIS_VM_MEMORY) {
addReplySds(c,sdsnew("-ERR This key is not in memory\r\n"));
} else if (val->refcount != 1) {
addReplySds(c,sdsnew("-ERR Object is shared\r\n"));
} else if ((vp = vmSwapObjectBlocking(val)) != NULL) {
dictGetEntryVal(de) = vp;
addReply(c,shared.ok);
} else {
addReply(c,shared.err);
}
} else if (!strcasecmp(c->argv[1]->ptr,"populate") && c->argc == 3) {
long keys, j;
robj *key, *val;
char buf[128];
if (getLongFromObjectOrReply(c, c->argv[2], &keys, NULL) != REDIS_OK)
return;
for (j = 0; j < keys; j++) {
snprintf(buf,sizeof(buf),"key:%lu",j);
key = createStringObject(buf,strlen(buf));
if (lookupKeyRead(c->db,key) != NULL) {
decrRefCount(key);
continue;
}
snprintf(buf,sizeof(buf),"value:%lu",j);
val = createStringObject(buf,strlen(buf));
dbAdd(c->db,key,val);
decrRefCount(key);
}
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"digest") && c->argc == 2) {
unsigned char digest[20];
sds d = sdsnew("+");
int j;
computeDatasetDigest(digest);
for (j = 0; j < 20; j++)
d = sdscatprintf(d, "%02x",digest[j]);
d = sdscatlen(d,"\r\n",2);
addReplySds(c,d);
} else {
addReplySds(c,sdsnew(
"-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
}
}
void _redisAssert(char *estr, char *file, int line) {
redisLog(REDIS_WARNING,"=== ASSERTION FAILED ===");
redisLog(REDIS_WARNING,"==> %s:%d '%s' is not true",file,line,estr);
#ifdef HAVE_BACKTRACE
redisLog(REDIS_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
*((char*)-1) = 'x';
#endif
}
void _redisPanic(char *msg, char *file, int line) {
redisLog(REDIS_WARNING,"!!! Software Failure. Press left mouse button to continue");
redisLog(REDIS_WARNING,"Guru Meditation: %s #%s:%d",msg,file,line);
#ifdef HAVE_BACKTRACE
redisLog(REDIS_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
*((char*)-1) = 'x';
#endif
}
#include "redis.h"
/* ================================ MULTI/EXEC ============================== */
/* Client state initialization for MULTI/EXEC */
void initClientMultiState(redisClient *c) {
c->mstate.commands = NULL;
c->mstate.count = 0;
}
/* Release all the resources associated with MULTI/EXEC state */
void freeClientMultiState(redisClient *c) {
int j;
for (j = 0; j < c->mstate.count; j++) {
int i;
multiCmd *mc = c->mstate.commands+j;
for (i = 0; i < mc->argc; i++)
decrRefCount(mc->argv[i]);
zfree(mc->argv);
}
zfree(c->mstate.commands);
}
/* Add a new command into the MULTI commands queue */
void queueMultiCommand(redisClient *c, struct redisCommand *cmd) {
multiCmd *mc;
int j;
c->mstate.commands = zrealloc(c->mstate.commands,
sizeof(multiCmd)*(c->mstate.count+1));
mc = c->mstate.commands+c->mstate.count;
mc->cmd = cmd;
mc->argc = c->argc;
mc->argv = zmalloc(sizeof(robj*)*c->argc);
memcpy(mc->argv,c->argv,sizeof(robj*)*c->argc);
for (j = 0; j < c->argc; j++)
incrRefCount(mc->argv[j]);
c->mstate.count++;
}
void multiCommand(redisClient *c) {
if (c->flags & REDIS_MULTI) {
addReplySds(c,sdsnew("-ERR MULTI calls can not be nested\r\n"));
return;
}
c->flags |= REDIS_MULTI;
addReply(c,shared.ok);
}
void discardCommand(redisClient *c) {
if (!(c->flags & REDIS_MULTI)) {
addReplySds(c,sdsnew("-ERR DISCARD without MULTI\r\n"));
return;
}
freeClientMultiState(c);
initClientMultiState(c);
c->flags &= (~REDIS_MULTI);
unwatchAllKeys(c);
addReply(c,shared.ok);
}
/* Send a MULTI command to all the slaves and AOF file. Check the execCommand
* implememntation for more information. */
void execCommandReplicateMulti(redisClient *c) {
struct redisCommand *cmd;
robj *multistring = createStringObject("MULTI",5);
cmd = lookupCommand("multi");
if (server.appendonly)
feedAppendOnlyFile(cmd,c->db->id,&multistring,1);
if (listLength(server.slaves))
replicationFeedSlaves(server.slaves,c->db->id,&multistring,1);
decrRefCount(multistring);
}
void execCommand(redisClient *c) {
int j;
robj **orig_argv;
int orig_argc;
if (!(c->flags & REDIS_MULTI)) {
addReplySds(c,sdsnew("-ERR EXEC without MULTI\r\n"));
return;
}
/* Check if we need to abort the EXEC if some WATCHed key was touched.
* A failed EXEC will return a multi bulk nil object. */
if (c->flags & REDIS_DIRTY_CAS) {
freeClientMultiState(c);
initClientMultiState(c);
c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS);
unwatchAllKeys(c);
addReply(c,shared.nullmultibulk);
return;
}
/* Replicate a MULTI request now that we are sure the block is executed.
* This way we'll deliver the MULTI/..../EXEC block as a whole and
* both the AOF and the replication link will have the same consistency
* and atomicity guarantees. */
execCommandReplicateMulti(c);
/* Exec all the queued commands */
unwatchAllKeys(c); /* Unwatch ASAP otherwise we'll waste CPU cycles */
orig_argv = c->argv;
orig_argc = c->argc;
addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->mstate.count));
for (j = 0; j < c->mstate.count; j++) {
c->argc = c->mstate.commands[j].argc;
c->argv = c->mstate.commands[j].argv;
call(c,c->mstate.commands[j].cmd);
}
c->argv = orig_argv;
c->argc = orig_argc;
freeClientMultiState(c);
initClientMultiState(c);
c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS);
/* Make sure the EXEC command is always replicated / AOF, since we
* always send the MULTI command (we can't know beforehand if the
* next operations will contain at least a modification to the DB). */
server.dirty++;
}
/* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
*
* The implementation uses a per-DB hash table mapping keys to list of clients
* WATCHing those keys, so that given a key that is going to be modified
* we can mark all the associated clients as dirty.
*
* Also every client contains a list of WATCHed keys so that's possible to
* un-watch such keys when the client is freed or when UNWATCH is called. */
/* In the client->watched_keys list we need to use watchedKey structures
* as in order to identify a key in Redis we need both the key name and the
* DB */
typedef struct watchedKey {
robj *key;
redisDb *db;
} watchedKey;
/* Watch for the specified key */
void watchForKey(redisClient *c, robj *key) {
list *clients = NULL;
listIter li;
listNode *ln;
watchedKey *wk;
/* Check if we are already watching for this key */
listRewind(c->watched_keys,&li);
while((ln = listNext(&li))) {
wk = listNodeValue(ln);
if (wk->db == c->db && equalStringObjects(key,wk->key))
return; /* Key already watched */
}
/* This key is not already watched in this DB. Let's add it */
clients = dictFetchValue(c->db->watched_keys,key);
if (!clients) {
clients = listCreate();
dictAdd(c->db->watched_keys,key,clients);
incrRefCount(key);
}
listAddNodeTail(clients,c);
/* Add the new key to the lits of keys watched by this client */
wk = zmalloc(sizeof(*wk));
wk->key = key;
wk->db = c->db;
incrRefCount(key);
listAddNodeTail(c->watched_keys,wk);
}
/* Unwatch all the keys watched by this client. To clean the EXEC dirty
* flag is up to the caller. */
void unwatchAllKeys(redisClient *c) {
listIter li;
listNode *ln;
if (listLength(c->watched_keys) == 0) return;
listRewind(c->watched_keys,&li);
while((ln = listNext(&li))) {
list *clients;
watchedKey *wk;
/* Lookup the watched key -> clients list and remove the client
* from the list */
wk = listNodeValue(ln);
clients = dictFetchValue(wk->db->watched_keys, wk->key);
redisAssert(clients != NULL);
listDelNode(clients,listSearchKey(clients,c));
/* Kill the entry at all if this was the only client */
if (listLength(clients) == 0)
dictDelete(wk->db->watched_keys, wk->key);
/* Remove this watched key from the client->watched list */
listDelNode(c->watched_keys,ln);
decrRefCount(wk->key);
zfree(wk);
}
}
/* "Touch" a key, so that if this key is being WATCHed by some client the
* next EXEC will fail. */
void touchWatchedKey(redisDb *db, robj *key) {
list *clients;
listIter li;
listNode *ln;
if (dictSize(db->watched_keys) == 0) return;
clients = dictFetchValue(db->watched_keys, key);
if (!clients) return;
/* Mark all the clients watching this key as REDIS_DIRTY_CAS */
/* Check if we are already watching for this key */
listRewind(clients,&li);
while((ln = listNext(&li))) {
redisClient *c = listNodeValue(ln);
c->flags |= REDIS_DIRTY_CAS;
}
}
/* On FLUSHDB or FLUSHALL all the watched keys that are present before the
* flush but will be deleted as effect of the flushing operation should
* be touched. "dbid" is the DB that's getting the flush. -1 if it is
* a FLUSHALL operation (all the DBs flushed). */
void touchWatchedKeysOnFlush(int dbid) {
listIter li1, li2;
listNode *ln;
/* For every client, check all the waited keys */
listRewind(server.clients,&li1);
while((ln = listNext(&li1))) {
redisClient *c = listNodeValue(ln);
listRewind(c->watched_keys,&li2);
while((ln = listNext(&li2))) {
watchedKey *wk = listNodeValue(ln);
/* For every watched key matching the specified DB, if the
* key exists, mark the client as dirty, as the key will be
* removed. */
if (dbid == -1 || wk->db->id == dbid) {
if (dictFind(wk->db->dict, wk->key->ptr) != NULL)
c->flags |= REDIS_DIRTY_CAS;
}
}
}
}
void watchCommand(redisClient *c) {
int j;
if (c->flags & REDIS_MULTI) {
addReplySds(c,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
return;
}
for (j = 1; j < c->argc; j++)
watchForKey(c,c->argv[j]);
addReply(c,shared.ok);
}
void unwatchCommand(redisClient *c) {
unwatchAllKeys(c);
c->flags &= (~REDIS_DIRTY_CAS);
addReply(c,shared.ok);
}
#include "redis.h"
#include <sys/uio.h>
void *dupClientReplyValue(void *o) {
incrRefCount((robj*)o);
return o;
}
int listMatchObjects(void *a, void *b) {
return equalStringObjects(a,b);
}
redisClient *createClient(int fd) {
redisClient *c = zmalloc(sizeof(*c));
anetNonBlock(NULL,fd);
anetTcpNoDelay(NULL,fd);
if (!c) return NULL;
selectDb(c,0);
c->fd = fd;
c->querybuf = sdsempty();
c->argc = 0;
c->argv = NULL;
c->bulklen = -1;
c->multibulk = 0;
c->mbargc = 0;
c->mbargv = NULL;
c->sentlen = 0;
c->flags = 0;
c->lastinteraction = time(NULL);
c->authenticated = 0;
c->replstate = REDIS_REPL_NONE;
c->reply = listCreate();
listSetFreeMethod(c->reply,decrRefCount);
listSetDupMethod(c->reply,dupClientReplyValue);
c->blocking_keys = NULL;
c->blocking_keys_num = 0;
c->io_keys = listCreate();
c->watched_keys = listCreate();
listSetFreeMethod(c->io_keys,decrRefCount);
c->pubsub_channels = dictCreate(&setDictType,NULL);
c->pubsub_patterns = listCreate();
listSetFreeMethod(c->pubsub_patterns,decrRefCount);
listSetMatchMethod(c->pubsub_patterns,listMatchObjects);
if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
readQueryFromClient, c) == AE_ERR) {
freeClient(c);
return NULL;
}
listAddNodeTail(server.clients,c);
initClientMultiState(c);
return c;
}
void addReply(redisClient *c, robj *obj) {
if (listLength(c->reply) == 0 &&
(c->replstate == REDIS_REPL_NONE ||
c->replstate == REDIS_REPL_ONLINE) &&
aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
sendReplyToClient, c) == AE_ERR) return;
if (server.vm_enabled && obj->storage != REDIS_VM_MEMORY) {
obj = dupStringObject(obj);
obj->refcount = 0; /* getDecodedObject() will increment the refcount */
}
listAddNodeTail(c->reply,getDecodedObject(obj));
}
void addReplySds(redisClient *c, sds s) {
robj *o = createObject(REDIS_STRING,s);
addReply(c,o);
decrRefCount(o);
}
void addReplyDouble(redisClient *c, double d) {
char buf[128];
snprintf(buf,sizeof(buf),"%.17g",d);
addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
(unsigned long) strlen(buf),buf));
}
void addReplyLongLong(redisClient *c, long long ll) {
char buf[128];
size_t len;
if (ll == 0) {
addReply(c,shared.czero);
return;
} else if (ll == 1) {
addReply(c,shared.cone);
return;
}
buf[0] = ':';
len = ll2string(buf+1,sizeof(buf)-1,ll);
buf[len+1] = '\r';
buf[len+2] = '\n';
addReplySds(c,sdsnewlen(buf,len+3));
}
void addReplyUlong(redisClient *c, unsigned long ul) {
char buf[128];
size_t len;
if (ul == 0) {
addReply(c,shared.czero);
return;
} else if (ul == 1) {
addReply(c,shared.cone);
return;
}
len = snprintf(buf,sizeof(buf),":%lu\r\n",ul);
addReplySds(c,sdsnewlen(buf,len));
}
void addReplyBulkLen(redisClient *c, robj *obj) {
size_t len, intlen;
char buf[128];
if (obj->encoding == REDIS_ENCODING_RAW) {
len = sdslen(obj->ptr);
} else {
long n = (long)obj->ptr;
/* Compute how many bytes will take this integer as a radix 10 string */
len = 1;
if (n < 0) {
len++;
n = -n;
}
while((n = n/10) != 0) {
len++;
}
}
buf[0] = '$';
intlen = ll2string(buf+1,sizeof(buf)-1,(long long)len);
buf[intlen+1] = '\r';
buf[intlen+2] = '\n';
addReplySds(c,sdsnewlen(buf,intlen+3));
}
void addReplyBulk(redisClient *c, robj *obj) {
addReplyBulkLen(c,obj);
addReply(c,obj);
addReply(c,shared.crlf);
}
/* In the CONFIG command we need to add vanilla C string as bulk replies */
void addReplyBulkCString(redisClient *c, char *s) {
if (s == NULL) {
addReply(c,shared.nullbulk);
} else {
robj *o = createStringObject(s,strlen(s));
addReplyBulk(c,o);
decrRefCount(o);
}
}
void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
int cport, cfd;
char cip[128];
redisClient *c;
REDIS_NOTUSED(el);
REDIS_NOTUSED(mask);
REDIS_NOTUSED(privdata);
cfd = anetAccept(server.neterr, fd, cip, &cport);
if (cfd == AE_ERR) {
redisLog(REDIS_VERBOSE,"Accepting client connection: %s", server.neterr);
return;
}
redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport);
if ((c = createClient(cfd)) == NULL) {
redisLog(REDIS_WARNING,"Error allocating resoures for the client");
close(cfd); /* May be already closed, just ingore errors */
return;
}
/* If maxclient directive is set and this is one client more... close the
* connection. Note that we create the client instead to check before
* for this condition, since now the socket is already set in nonblocking
* mode and we can send an error for free using the Kernel I/O */
if (server.maxclients && listLength(server.clients) > server.maxclients) {
char *err = "-ERR max number of clients reached\r\n";
/* That's a best effort error message, don't check write errors */
if (write(c->fd,err,strlen(err)) == -1) {
/* Nothing to do, Just to avoid the warning... */
}
freeClient(c);
return;
}
server.stat_numconnections++;
}
static void freeClientArgv(redisClient *c) {
int j;
for (j = 0; j < c->argc; j++)
decrRefCount(c->argv[j]);
for (j = 0; j < c->mbargc; j++)
decrRefCount(c->mbargv[j]);
c->argc = 0;
c->mbargc = 0;
}
void freeClient(redisClient *c) {
listNode *ln;
/* Note that if the client we are freeing is blocked into a blocking
* call, we have to set querybuf to NULL *before* to call
* unblockClientWaitingData() to avoid processInputBuffer() will get
* called. Also it is important to remove the file events after
* this, because this call adds the READABLE event. */
sdsfree(c->querybuf);
c->querybuf = NULL;
if (c->flags & REDIS_BLOCKED)
unblockClientWaitingData(c);
/* UNWATCH all the keys */
unwatchAllKeys(c);
listRelease(c->watched_keys);
/* Unsubscribe from all the pubsub channels */
pubsubUnsubscribeAllChannels(c,0);
pubsubUnsubscribeAllPatterns(c,0);
dictRelease(c->pubsub_channels);
listRelease(c->pubsub_patterns);
/* Obvious cleanup */
aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
listRelease(c->reply);
freeClientArgv(c);
close(c->fd);
/* Remove from the list of clients */
ln = listSearchKey(server.clients,c);
redisAssert(ln != NULL);
listDelNode(server.clients,ln);
/* Remove from the list of clients that are now ready to be restarted
* after waiting for swapped keys */
if (c->flags & REDIS_IO_WAIT && listLength(c->io_keys) == 0) {
ln = listSearchKey(server.io_ready_clients,c);
if (ln) {
listDelNode(server.io_ready_clients,ln);
server.vm_blocked_clients--;
}
}
/* Remove from the list of clients waiting for swapped keys */
while (server.vm_enabled && listLength(c->io_keys)) {
ln = listFirst(c->io_keys);
dontWaitForSwappedKey(c,ln->value);
}
listRelease(c->io_keys);
/* Master/slave cleanup */
if (c->flags & REDIS_SLAVE) {
if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
close(c->repldbfd);
list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
ln = listSearchKey(l,c);
redisAssert(ln != NULL);
listDelNode(l,ln);
}
if (c->flags & REDIS_MASTER) {
server.master = NULL;
server.replstate = REDIS_REPL_CONNECT;
}
/* Release memory */
zfree(c->argv);
zfree(c->mbargv);
freeClientMultiState(c);
zfree(c);
}
#define GLUEREPLY_UP_TO (1024)
static void glueReplyBuffersIfNeeded(redisClient *c) {
int copylen = 0;
char buf[GLUEREPLY_UP_TO];
listNode *ln;
listIter li;
robj *o;
listRewind(c->reply,&li);
while((ln = listNext(&li))) {
int objlen;
o = ln->value;
objlen = sdslen(o->ptr);
if (copylen + objlen <= GLUEREPLY_UP_TO) {
memcpy(buf+copylen,o->ptr,objlen);
copylen += objlen;
listDelNode(c->reply,ln);
} else {
if (copylen == 0) return;
break;
}
}
/* Now the output buffer is empty, add the new single element */
o = createObject(REDIS_STRING,sdsnewlen(buf,copylen));
listAddNodeHead(c->reply,o);
}
void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
redisClient *c = privdata;
int nwritten = 0, totwritten = 0, objlen;
robj *o;
REDIS_NOTUSED(el);
REDIS_NOTUSED(mask);
/* Use writev() if we have enough buffers to send */
if (!server.glueoutputbuf &&
listLength(c->reply) > REDIS_WRITEV_THRESHOLD &&
!(c->flags & REDIS_MASTER))
{
sendReplyToClientWritev(el, fd, privdata, mask);
return;
}
while(listLength(c->reply)) {
if (server.glueoutputbuf && listLength(c->reply) > 1)
glueReplyBuffersIfNeeded(c);
o = listNodeValue(listFirst(c->reply));
objlen = sdslen(o->ptr);
if (objlen == 0) {
listDelNode(c->reply,listFirst(c->reply));
continue;
}
if (c->flags & REDIS_MASTER) {
/* Don't reply to a master */
nwritten = objlen - c->sentlen;
} else {
nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen);
if (nwritten <= 0) break;
}
c->sentlen += nwritten;
totwritten += nwritten;
/* If we fully sent the object on head go to the next one */
if (c->sentlen == objlen) {
listDelNode(c->reply,listFirst(c->reply));
c->sentlen = 0;
}
/* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
* bytes, in a single threaded server it's a good idea to serve
* other clients as well, even if a very large request comes from
* super fast link that is always able to accept data (in real world
* scenario think about 'KEYS *' against the loopback interfae) */
if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
}
if (nwritten == -1) {
if (errno == EAGAIN) {
nwritten = 0;
} else {
redisLog(REDIS_VERBOSE,
"Error writing to client: %s", strerror(errno));
freeClient(c);
return;
}
}
if (totwritten > 0) c->lastinteraction = time(NULL);
if (listLength(c->reply) == 0) {
c->sentlen = 0;
aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
}
}
void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask)
{
redisClient *c = privdata;
int nwritten = 0, totwritten = 0, objlen, willwrite;
robj *o;
struct iovec iov[REDIS_WRITEV_IOVEC_COUNT];
int offset, ion = 0;
REDIS_NOTUSED(el);
REDIS_NOTUSED(mask);
listNode *node;
while (listLength(c->reply)) {
offset = c->sentlen;
ion = 0;
willwrite = 0;
/* fill-in the iov[] array */
for(node = listFirst(c->reply); node; node = listNextNode(node)) {
o = listNodeValue(node);
objlen = sdslen(o->ptr);
if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT)
break;
if(ion == REDIS_WRITEV_IOVEC_COUNT)
break; /* no more iovecs */
iov[ion].iov_base = ((char*)o->ptr) + offset;
iov[ion].iov_len = objlen - offset;
willwrite += objlen - offset;
offset = 0; /* just for the first item */
ion++;
}
if(willwrite == 0)
break;
/* write all collected blocks at once */
if((nwritten = writev(fd, iov, ion)) < 0) {
if (errno != EAGAIN) {
redisLog(REDIS_VERBOSE,
"Error writing to client: %s", strerror(errno));
freeClient(c);
return;
}
break;
}
totwritten += nwritten;
offset = c->sentlen;
/* remove written robjs from c->reply */
while (nwritten && listLength(c->reply)) {
o = listNodeValue(listFirst(c->reply));
objlen = sdslen(o->ptr);
if(nwritten >= objlen - offset) {
listDelNode(c->reply, listFirst(c->reply));
nwritten -= objlen - offset;
c->sentlen = 0;
} else {
/* partial write */
c->sentlen += nwritten;
break;
}
offset = 0;
}
}
if (totwritten > 0)
c->lastinteraction = time(NULL);
if (listLength(c->reply) == 0) {
c->sentlen = 0;
aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
}
}
/* resetClient prepare the client to process the next command */
void resetClient(redisClient *c) {
freeClientArgv(c);
c->bulklen = -1;
c->multibulk = 0;
}
void closeTimedoutClients(void) {
redisClient *c;
listNode *ln;
time_t now = time(NULL);
listIter li;
listRewind(server.clients,&li);
while ((ln = listNext(&li)) != NULL) {
c = listNodeValue(ln);
if (server.maxidletime &&
!(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
!(c->flags & REDIS_MASTER) && /* no timeout for masters */
dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */
listLength(c->pubsub_patterns) == 0 &&
(now - c->lastinteraction > server.maxidletime))
{
redisLog(REDIS_VERBOSE,"Closing idle client");
freeClient(c);
} else if (c->flags & REDIS_BLOCKED) {
if (c->blockingto != 0 && c->blockingto < now) {
addReply(c,shared.nullmultibulk);
unblockClientWaitingData(c);
}
}
}
}
void processInputBuffer(redisClient *c) {
again:
/* Before to process the input buffer, make sure the client is not
* waitig for a blocking operation such as BLPOP. Note that the first
* iteration the client is never blocked, otherwise the processInputBuffer
* would not be called at all, but after the execution of the first commands
* in the input buffer the client may be blocked, and the "goto again"
* will try to reiterate. The following line will make it return asap. */
if (c->flags & REDIS_BLOCKED || c->flags & REDIS_IO_WAIT) return;
if (c->bulklen == -1) {
/* Read the first line of the query */
char *p = strchr(c->querybuf,'\n');
size_t querylen;
if (p) {
sds query, *argv;
int argc, j;
query = c->querybuf;
c->querybuf = sdsempty();
querylen = 1+(p-(query));
if (sdslen(query) > querylen) {
/* leave data after the first line of the query in the buffer */
c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen);
}
*p = '\0'; /* remove "\n" */
if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */
sdsupdatelen(query);
/* Now we can split the query in arguments */
argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
sdsfree(query);
if (c->argv) zfree(c->argv);
c->argv = zmalloc(sizeof(robj*)*argc);
for (j = 0; j < argc; j++) {
if (sdslen(argv[j])) {
c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
c->argc++;
} else {
sdsfree(argv[j]);
}
}
zfree(argv);
if (c->argc) {
/* Execute the command. If the client is still valid
* after processCommand() return and there is something
* on the query buffer try to process the next command. */
if (processCommand(c) && sdslen(c->querybuf)) goto again;
} else {
/* Nothing to process, argc == 0. Just process the query
* buffer if it's not empty or return to the caller */
if (sdslen(c->querybuf)) goto again;
}
return;
} else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) {
redisLog(REDIS_VERBOSE, "Client protocol error");
freeClient(c);
return;
}
} else {
/* Bulk read handling. Note that if we are at this point
the client already sent a command terminated with a newline,
we are reading the bulk data that is actually the last
argument of the command. */
int qbl = sdslen(c->querybuf);
if (c->bulklen <= qbl) {
/* Copy everything but the final CRLF as final argument */
c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
c->argc++;
c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
/* Process the command. If the client is still valid after
* the processing and there is more data in the buffer
* try to parse it. */
if (processCommand(c) && sdslen(c->querybuf)) goto again;
return;
}
}
}
void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
redisClient *c = (redisClient*) privdata;
char buf[REDIS_IOBUF_LEN];
int nread;
REDIS_NOTUSED(el);
REDIS_NOTUSED(mask);
nread = read(fd, buf, REDIS_IOBUF_LEN);
if (nread == -1) {
if (errno == EAGAIN) {
nread = 0;
} else {
redisLog(REDIS_VERBOSE, "Reading from client: %s",strerror(errno));
freeClient(c);
return;
}
} else if (nread == 0) {
redisLog(REDIS_VERBOSE, "Client closed connection");
freeClient(c);
return;
}
if (nread) {
c->querybuf = sdscatlen(c->querybuf, buf, nread);
c->lastinteraction = time(NULL);
} else {
return;
}
processInputBuffer(c);
}
#include "redis.h"
#include <pthread.h>
robj *createObject(int type, void *ptr) {
robj *o;
if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex);
if (listLength(server.objfreelist)) {
listNode *head = listFirst(server.objfreelist);
o = listNodeValue(head);
listDelNode(server.objfreelist,head);
if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
} else {
if (server.vm_enabled)
pthread_mutex_unlock(&server.obj_freelist_mutex);
o = zmalloc(sizeof(*o));
}
o->type = type;
o->encoding = REDIS_ENCODING_RAW;
o->ptr = ptr;
o->refcount = 1;
if (server.vm_enabled) {
/* Note that this code may run in the context of an I/O thread
* and accessing server.lruclock in theory is an error
* (no locks). But in practice this is safe, and even if we read
* garbage Redis will not fail. */
o->lru = server.lruclock;
o->storage = REDIS_VM_MEMORY;
}
return o;
}
robj *createStringObject(char *ptr, size_t len) {
return createObject(REDIS_STRING,sdsnewlen(ptr,len));
}
robj *createStringObjectFromLongLong(long long value) {
robj *o;
if (value >= 0 && value < REDIS_SHARED_INTEGERS) {
incrRefCount(shared.integers[value]);
o = shared.integers[value];
} else {
if (value >= LONG_MIN && value <= LONG_MAX) {
o = createObject(REDIS_STRING, NULL);
o->encoding = REDIS_ENCODING_INT;
o->ptr = (void*)((long)value);
} else {
o = createObject(REDIS_STRING,sdsfromlonglong(value));
}
}
return o;
}
robj *dupStringObject(robj *o) {
redisAssert(o->encoding == REDIS_ENCODING_RAW);
return createStringObject(o->ptr,sdslen(o->ptr));
}
robj *createListObject(void) {
list *l = listCreate();
robj *o = createObject(REDIS_LIST,l);
listSetFreeMethod(l,decrRefCount);
o->encoding = REDIS_ENCODING_LINKEDLIST;
return o;
}
robj *createZiplistObject(void) {
unsigned char *zl = ziplistNew();
robj *o = createObject(REDIS_LIST,zl);
o->encoding = REDIS_ENCODING_ZIPLIST;
return o;
}
robj *createSetObject(void) {
dict *d = dictCreate(&setDictType,NULL);
return createObject(REDIS_SET,d);
}
robj *createHashObject(void) {
/* All the Hashes start as zipmaps. Will be automatically converted
* into hash tables if there are enough elements or big elements
* inside. */
unsigned char *zm = zipmapNew();
robj *o = createObject(REDIS_HASH,zm);
o->encoding = REDIS_ENCODING_ZIPMAP;
return o;
}
robj *createZsetObject(void) {
zset *zs = zmalloc(sizeof(*zs));
zs->dict = dictCreate(&zsetDictType,NULL);
zs->zsl = zslCreate();
return createObject(REDIS_ZSET,zs);
}
void freeStringObject(robj *o) {
if (o->encoding == REDIS_ENCODING_RAW) {
sdsfree(o->ptr);
}
}
void freeListObject(robj *o) {
switch (o->encoding) {
case REDIS_ENCODING_LINKEDLIST:
listRelease((list*) o->ptr);
break;
case REDIS_ENCODING_ZIPLIST:
zfree(o->ptr);
break;
default:
redisPanic("Unknown list encoding type");
}
}
void freeSetObject(robj *o) {
dictRelease((dict*) o->ptr);
}
void freeZsetObject(robj *o) {
zset *zs = o->ptr;
dictRelease(zs->dict);
zslFree(zs->zsl);
zfree(zs);
}
void freeHashObject(robj *o) {
switch (o->encoding) {
case REDIS_ENCODING_HT:
dictRelease((dict*) o->ptr);
break;
case REDIS_ENCODING_ZIPMAP:
zfree(o->ptr);
break;
default:
redisPanic("Unknown hash encoding type");
break;
}
}
void incrRefCount(robj *o) {
o->refcount++;
}
void decrRefCount(void *obj) {
robj *o = obj;
/* Object is a swapped out value, or in the process of being loaded. */
if (server.vm_enabled &&
(o->storage == REDIS_VM_SWAPPED || o->storage == REDIS_VM_LOADING))
{
vmpointer *vp = obj;
if (o->storage == REDIS_VM_LOADING) vmCancelThreadedIOJob(o);
vmMarkPagesFree(vp->page,vp->usedpages);
server.vm_stats_swapped_objects--;
zfree(vp);
return;
}
if (o->refcount <= 0) redisPanic("decrRefCount against refcount <= 0");
/* Object is in memory, or in the process of being swapped out.
*
* If the object is being swapped out, abort the operation on
* decrRefCount even if the refcount does not drop to 0: the object
* is referenced at least two times, as value of the key AND as
* job->val in the iojob. So if we don't invalidate the iojob, when it is
* done but the relevant key was removed in the meantime, the
* complete jobs handler will not find the key about the job and the
* assert will fail. */
if (server.vm_enabled && o->storage == REDIS_VM_SWAPPING)
vmCancelThreadedIOJob(o);
if (--(o->refcount) == 0) {
switch(o->type) {
case REDIS_STRING: freeStringObject(o); break;
case REDIS_LIST: freeListObject(o); break;
case REDIS_SET: freeSetObject(o); break;
case REDIS_ZSET: freeZsetObject(o); break;
case REDIS_HASH: freeHashObject(o); break;
default: redisPanic("Unknown object type"); break;
}
if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex);
if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
!listAddNodeHead(server.objfreelist,o))
zfree(o);
if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
}
}
int checkType(redisClient *c, robj *o, int type) {
if (o->type != type) {
addReply(c,shared.wrongtypeerr);
return 1;
}
return 0;
}
/* Try to encode a string object in order to save space */
robj *tryObjectEncoding(robj *o) {
long value;
sds s = o->ptr;
if (o->encoding != REDIS_ENCODING_RAW)
return o; /* Already encoded */
/* It's not safe to encode shared objects: shared objects can be shared
* everywhere in the "object space" of Redis. Encoded objects can only
* appear as "values" (and not, for instance, as keys) */
if (o->refcount > 1) return o;
/* Currently we try to encode only strings */
redisAssert(o->type == REDIS_STRING);
/* Check if we can represent this string as a long integer */
if (isStringRepresentableAsLong(s,&value) == REDIS_ERR) return o;
/* Ok, this object can be encoded */
if (value >= 0 && value < REDIS_SHARED_INTEGERS) {
decrRefCount(o);
incrRefCount(shared.integers[value]);
return shared.integers[value];
} else {
o->encoding = REDIS_ENCODING_INT;
sdsfree(o->ptr);
o->ptr = (void*) value;
return o;
}
}
/* Get a decoded version of an encoded object (returned as a new object).
* If the object is already raw-encoded just increment the ref count. */
robj *getDecodedObject(robj *o) {
robj *dec;
if (o->encoding == REDIS_ENCODING_RAW) {
incrRefCount(o);
return o;
}
if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
char buf[32];
ll2string(buf,32,(long)o->ptr);
dec = createStringObject(buf,strlen(buf));
return dec;
} else {
redisPanic("Unknown encoding type");
}
}
/* Compare two string objects via strcmp() or alike.
* Note that the objects may be integer-encoded. In such a case we
* use ll2string() to get a string representation of the numbers on the stack
* and compare the strings, it's much faster than calling getDecodedObject().
*
* Important note: if objects are not integer encoded, but binary-safe strings,
* sdscmp() from sds.c will apply memcmp() so this function ca be considered
* binary safe. */
int compareStringObjects(robj *a, robj *b) {
redisAssert(a->type == REDIS_STRING && b->type == REDIS_STRING);
char bufa[128], bufb[128], *astr, *bstr;
int bothsds = 1;
if (a == b) return 0;
if (a->encoding != REDIS_ENCODING_RAW) {
ll2string(bufa,sizeof(bufa),(long) a->ptr);
astr = bufa;
bothsds = 0;
} else {
astr = a->ptr;
}
if (b->encoding != REDIS_ENCODING_RAW) {
ll2string(bufb,sizeof(bufb),(long) b->ptr);
bstr = bufb;
bothsds = 0;
} else {
bstr = b->ptr;
}
return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr);
}
/* Equal string objects return 1 if the two objects are the same from the
* point of view of a string comparison, otherwise 0 is returned. Note that
* this function is faster then checking for (compareStringObject(a,b) == 0)
* because it can perform some more optimization. */
int equalStringObjects(robj *a, robj *b) {
if (a->encoding != REDIS_ENCODING_RAW && b->encoding != REDIS_ENCODING_RAW){
return a->ptr == b->ptr;
} else {
return compareStringObjects(a,b) == 0;
}
}
size_t stringObjectLen(robj *o) {
redisAssert(o->type == REDIS_STRING);
if (o->encoding == REDIS_ENCODING_RAW) {
return sdslen(o->ptr);
} else {
char buf[32];
return ll2string(buf,32,(long)o->ptr);
}
}
int getDoubleFromObject(robj *o, double *target) {
double value;
char *eptr;
if (o == NULL) {
value = 0;
} else {
redisAssert(o->type == REDIS_STRING);
if (o->encoding == REDIS_ENCODING_RAW) {
value = strtod(o->ptr, &eptr);
if (eptr[0] != '\0') return REDIS_ERR;
} else if (o->encoding == REDIS_ENCODING_INT) {
value = (long)o->ptr;
} else {
redisPanic("Unknown string encoding");
}
}
*target = value;
return REDIS_OK;
}
int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const char *msg) {
double value;
if (getDoubleFromObject(o, &value) != REDIS_OK) {
if (msg != NULL) {
addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg));
} else {
addReplySds(c, sdsnew("-ERR value is not a double\r\n"));
}
return REDIS_ERR;
}
*target = value;
return REDIS_OK;
}
int getLongLongFromObject(robj *o, long long *target) {
long long value;
char *eptr;
if (o == NULL) {
value = 0;
} else {
redisAssert(o->type == REDIS_STRING);
if (o->encoding == REDIS_ENCODING_RAW) {
value = strtoll(o->ptr, &eptr, 10);
if (eptr[0] != '\0') return REDIS_ERR;
} else if (o->encoding == REDIS_ENCODING_INT) {
value = (long)o->ptr;
} else {
redisPanic("Unknown string encoding");
}
}
*target = value;
return REDIS_OK;
}
int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, const char *msg) {
long long value;
if (getLongLongFromObject(o, &value) != REDIS_OK) {
if (msg != NULL) {
addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg));
} else {
addReplySds(c, sdsnew("-ERR value is not an integer\r\n"));
}
return REDIS_ERR;
}
*target = value;
return REDIS_OK;
}
int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg) {
long long value;
if (getLongLongFromObjectOrReply(c, o, &value, msg) != REDIS_OK) return REDIS_ERR;
if (value < LONG_MIN || value > LONG_MAX) {
if (msg != NULL) {
addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg));
} else {
addReplySds(c, sdsnew("-ERR value is out of range\r\n"));
}
return REDIS_ERR;
}
*target = value;
return REDIS_OK;
}
char *strEncoding(int encoding) {
switch(encoding) {
case REDIS_ENCODING_RAW: return "raw";
case REDIS_ENCODING_INT: return "int";
case REDIS_ENCODING_HT: return "hashtable";
case REDIS_ENCODING_ZIPMAP: return "zipmap";
case REDIS_ENCODING_LINKEDLIST: return "linkedlist";
case REDIS_ENCODING_ZIPLIST: return "ziplist";
default: return "unknown";
}
}
#include "redis.h"
void freePubsubPattern(void *p) {
pubsubPattern *pat = p;
decrRefCount(pat->pattern);
zfree(pat);
}
int listMatchPubsubPattern(void *a, void *b) {
pubsubPattern *pa = a, *pb = b;
return (pa->client == pb->client) &&
(equalStringObjects(pa->pattern,pb->pattern));
}
/* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
* 0 if the client was already subscribed to that channel. */
int pubsubSubscribeChannel(redisClient *c, robj *channel) {
struct dictEntry *de;
list *clients = NULL;
int retval = 0;
/* Add the channel to the client -> channels hash table */
if (dictAdd(c->pubsub_channels,channel,NULL) == DICT_OK) {
retval = 1;
incrRefCount(channel);
/* Add the client to the channel -> list of clients hash table */
de = dictFind(server.pubsub_channels,channel);
if (de == NULL) {
clients = listCreate();
dictAdd(server.pubsub_channels,channel,clients);
incrRefCount(channel);
} else {
clients = dictGetEntryVal(de);
}
listAddNodeTail(clients,c);
}
/* Notify the client */
addReply(c,shared.mbulk3);
addReply(c,shared.subscribebulk);
addReplyBulk(c,channel);
addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns));
return retval;
}
/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
* 0 if the client was not subscribed to the specified channel. */
int pubsubUnsubscribeChannel(redisClient *c, robj *channel, int notify) {
struct dictEntry *de;
list *clients;
listNode *ln;
int retval = 0;
/* Remove the channel from the client -> channels hash table */
incrRefCount(channel); /* channel may be just a pointer to the same object
we have in the hash tables. Protect it... */
if (dictDelete(c->pubsub_channels,channel) == DICT_OK) {
retval = 1;
/* Remove the client from the channel -> clients list hash table */
de = dictFind(server.pubsub_channels,channel);
redisAssert(de != NULL);
clients = dictGetEntryVal(de);
ln = listSearchKey(clients,c);
redisAssert(ln != NULL);
listDelNode(clients,ln);
if (listLength(clients) == 0) {
/* Free the list and associated hash entry at all if this was
* the latest client, so that it will be possible to abuse
* Redis PUBSUB creating millions of channels. */
dictDelete(server.pubsub_channels,channel);
}
}
/* Notify the client */
if (notify) {
addReply(c,shared.mbulk3);
addReply(c,shared.unsubscribebulk);
addReplyBulk(c,channel);
addReplyLongLong(c,dictSize(c->pubsub_channels)+
listLength(c->pubsub_patterns));
}
decrRefCount(channel); /* it is finally safe to release it */
return retval;
}
/* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
int pubsubSubscribePattern(redisClient *c, robj *pattern) {
int retval = 0;
if (listSearchKey(c->pubsub_patterns,pattern) == NULL) {
retval = 1;
pubsubPattern *pat;
listAddNodeTail(c->pubsub_patterns,pattern);
incrRefCount(pattern);
pat = zmalloc(sizeof(*pat));
pat->pattern = getDecodedObject(pattern);
pat->client = c;
listAddNodeTail(server.pubsub_patterns,pat);
}
/* Notify the client */
addReply(c,shared.mbulk3);
addReply(c,shared.psubscribebulk);
addReplyBulk(c,pattern);
addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns));
return retval;
}
/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
* 0 if the client was not subscribed to the specified channel. */
int pubsubUnsubscribePattern(redisClient *c, robj *pattern, int notify) {
listNode *ln;
pubsubPattern pat;
int retval = 0;
incrRefCount(pattern); /* Protect the object. May be the same we remove */
if ((ln = listSearchKey(c->pubsub_patterns,pattern)) != NULL) {
retval = 1;
listDelNode(c->pubsub_patterns,ln);
pat.client = c;
pat.pattern = pattern;
ln = listSearchKey(server.pubsub_patterns,&pat);
listDelNode(server.pubsub_patterns,ln);
}
/* Notify the client */
if (notify) {
addReply(c,shared.mbulk3);
addReply(c,shared.punsubscribebulk);
addReplyBulk(c,pattern);
addReplyLongLong(c,dictSize(c->pubsub_channels)+
listLength(c->pubsub_patterns));
}
decrRefCount(pattern);
return retval;
}
/* Unsubscribe from all the channels. Return the number of channels the
* client was subscribed from. */
int pubsubUnsubscribeAllChannels(redisClient *c, int notify) {
dictIterator *di = dictGetIterator(c->pubsub_channels);
dictEntry *de;
int count = 0;
while((de = dictNext(di)) != NULL) {
robj *channel = dictGetEntryKey(de);
count += pubsubUnsubscribeChannel(c,channel,notify);
}
dictReleaseIterator(di);
return count;
}
/* Unsubscribe from all the patterns. Return the number of patterns the
* client was subscribed from. */
int pubsubUnsubscribeAllPatterns(redisClient *c, int notify) {
listNode *ln;
listIter li;
int count = 0;
listRewind(c->pubsub_patterns,&li);
while ((ln = listNext(&li)) != NULL) {
robj *pattern = ln->value;
count += pubsubUnsubscribePattern(c,pattern,notify);
}
return count;
}
/* Publish a message */
int pubsubPublishMessage(robj *channel, robj *message) {
int receivers = 0;
struct dictEntry *de;
listNode *ln;
listIter li;
/* Send to clients listening for that channel */
de = dictFind(server.pubsub_channels,channel);
if (de) {
list *list = dictGetEntryVal(de);
listNode *ln;
listIter li;
listRewind(list,&li);
while ((ln = listNext(&li)) != NULL) {
redisClient *c = ln->value;
addReply(c,shared.mbulk3);
addReply(c,shared.messagebulk);
addReplyBulk(c,channel);
addReplyBulk(c,message);
receivers++;
}
}
/* Send to clients listening to matching channels */
if (listLength(server.pubsub_patterns)) {
listRewind(server.pubsub_patterns,&li);
channel = getDecodedObject(channel);
while ((ln = listNext(&li)) != NULL) {
pubsubPattern *pat = ln->value;
if (stringmatchlen((char*)pat->pattern->ptr,
sdslen(pat->pattern->ptr),
(char*)channel->ptr,
sdslen(channel->ptr),0)) {
addReply(pat->client,shared.mbulk4);
addReply(pat->client,shared.pmessagebulk);
addReplyBulk(pat->client,pat->pattern);
addReplyBulk(pat->client,channel);
addReplyBulk(pat->client,message);
receivers++;
}
}
decrRefCount(channel);
}
return receivers;
}
void subscribeCommand(redisClient *c) {
int j;
for (j = 1; j < c->argc; j++)
pubsubSubscribeChannel(c,c->argv[j]);
}
void unsubscribeCommand(redisClient *c) {
if (c->argc == 1) {
pubsubUnsubscribeAllChannels(c,1);
return;
} else {
int j;
for (j = 1; j < c->argc; j++)
pubsubUnsubscribeChannel(c,c->argv[j],1);
}
}
void psubscribeCommand(redisClient *c) {
int j;
for (j = 1; j < c->argc; j++)
pubsubSubscribePattern(c,c->argv[j]);
}
void punsubscribeCommand(redisClient *c) {
if (c->argc == 1) {
pubsubUnsubscribeAllPatterns(c,1);
return;
} else {
int j;
for (j = 1; j < c->argc; j++)
pubsubUnsubscribePattern(c,c->argv[j],1);
}
}
void publishCommand(redisClient *c) {
int receivers = pubsubPublishMessage(c->argv[1],c->argv[2]);
addReplyLongLong(c,receivers);
}
此差异已折叠。
此差异已折叠。
此差异已折叠。
#include "redis.h"
#include <sys/time.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
listNode *ln;
listIter li;
int outc = 0, j;
robj **outv;
/* We need 1+(ARGS*3) objects since commands are using the new protocol
* and we one 1 object for the first "*<count>\r\n" multibulk count, then
* for every additional object we have "$<count>\r\n" + object + "\r\n". */
robj *static_outv[REDIS_STATIC_ARGS*3+1];
robj *lenobj;
if (argc <= REDIS_STATIC_ARGS) {
outv = static_outv;
} else {
outv = zmalloc(sizeof(robj*)*(argc*3+1));
}
lenobj = createObject(REDIS_STRING,
sdscatprintf(sdsempty(), "*%d\r\n", argc));
lenobj->refcount = 0;
outv[outc++] = lenobj;
for (j = 0; j < argc; j++) {
lenobj = createObject(REDIS_STRING,
sdscatprintf(sdsempty(),"$%lu\r\n",
(unsigned long) stringObjectLen(argv[j])));
lenobj->refcount = 0;
outv[outc++] = lenobj;
outv[outc++] = argv[j];
outv[outc++] = shared.crlf;
}
/* Increment all the refcounts at start and decrement at end in order to
* be sure to free objects if there is no slave in a replication state
* able to be feed with commands */
for (j = 0; j < outc; j++) incrRefCount(outv[j]);
listRewind(slaves,&li);
while((ln = listNext(&li))) {
redisClient *slave = ln->value;
/* Don't feed slaves that are still waiting for BGSAVE to start */
if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
/* Feed all the other slaves, MONITORs and so on */
if (slave->slaveseldb != dictid) {
robj *selectcmd;
switch(dictid) {
case 0: selectcmd = shared.select0; break;
case 1: selectcmd = shared.select1; break;
case 2: selectcmd = shared.select2; break;
case 3: selectcmd = shared.select3; break;
case 4: selectcmd = shared.select4; break;
case 5: selectcmd = shared.select5; break;
case 6: selectcmd = shared.select6; break;
case 7: selectcmd = shared.select7; break;
case 8: selectcmd = shared.select8; break;
case 9: selectcmd = shared.select9; break;
default:
selectcmd = createObject(REDIS_STRING,
sdscatprintf(sdsempty(),"select %d\r\n",dictid));
selectcmd->refcount = 0;
break;
}
addReply(slave,selectcmd);
slave->slaveseldb = dictid;
}
for (j = 0; j < outc; j++) addReply(slave,outv[j]);
}
for (j = 0; j < outc; j++) decrRefCount(outv[j]);
if (outv != static_outv) zfree(outv);
}
void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc) {
listNode *ln;
listIter li;
int j;
sds cmdrepr = sdsnew("+");
robj *cmdobj;
struct timeval tv;
gettimeofday(&tv,NULL);
cmdrepr = sdscatprintf(cmdrepr,"%ld.%ld ",(long)tv.tv_sec,(long)tv.tv_usec);
if (dictid != 0) cmdrepr = sdscatprintf(cmdrepr,"(db %d) ", dictid);
for (j = 0; j < argc; j++) {
if (argv[j]->encoding == REDIS_ENCODING_INT) {
cmdrepr = sdscatprintf(cmdrepr, "%ld", (long)argv[j]->ptr);
} else {
cmdrepr = sdscatrepr(cmdrepr,(char*)argv[j]->ptr,
sdslen(argv[j]->ptr));
}
if (j != argc-1)
cmdrepr = sdscatlen(cmdrepr," ",1);
}
cmdrepr = sdscatlen(cmdrepr,"\r\n",2);
cmdobj = createObject(REDIS_STRING,cmdrepr);
listRewind(monitors,&li);
while((ln = listNext(&li))) {
redisClient *monitor = ln->value;
addReply(monitor,cmdobj);
}
decrRefCount(cmdobj);
}
int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
ssize_t nwritten, ret = size;
time_t start = time(NULL);
timeout++;
while(size) {
if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
nwritten = write(fd,ptr,size);
if (nwritten == -1) return -1;
ptr += nwritten;
size -= nwritten;
}
if ((time(NULL)-start) > timeout) {
errno = ETIMEDOUT;
return -1;
}
}
return ret;
}
int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
ssize_t nread, totread = 0;
time_t start = time(NULL);
timeout++;
while(size) {
if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
nread = read(fd,ptr,size);
if (nread == -1) return -1;
ptr += nread;
size -= nread;
totread += nread;
}
if ((time(NULL)-start) > timeout) {
errno = ETIMEDOUT;
return -1;
}
}
return totread;
}
int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
ssize_t nread = 0;
size--;
while(size) {
char c;
if (syncRead(fd,&c,1,timeout) == -1) return -1;
if (c == '\n') {
*ptr = '\0';
if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
return nread;
} else {
*ptr++ = c;
*ptr = '\0';
nread++;
}
}
return nread;
}
void syncCommand(redisClient *c) {
/* ignore SYNC if aleady slave or in monitor mode */
if (c->flags & REDIS_SLAVE) return;
/* SYNC can't be issued when the server has pending data to send to
* the client about already issued commands. We need a fresh reply
* buffer registering the differences between the BGSAVE and the current
* dataset, so that we can copy to other slaves if needed. */
if (listLength(c->reply) != 0) {
addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
return;
}
redisLog(REDIS_NOTICE,"Slave ask for synchronization");
/* Here we need to check if there is a background saving operation
* in progress, or if it is required to start one */
if (server.bgsavechildpid != -1) {
/* Ok a background save is in progress. Let's check if it is a good
* one for replication, i.e. if there is another slave that is
* registering differences since the server forked to save */
redisClient *slave;
listNode *ln;
listIter li;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
slave = ln->value;
if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
}
if (ln) {
/* Perfect, the server is already registering differences for
* another slave. Set the right state, and copy the buffer. */
listRelease(c->reply);
c->reply = listDup(slave->reply);
c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
} else {
/* No way, we need to wait for the next BGSAVE in order to
* register differences */
c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
}
} else {
/* Ok we don't have a BGSAVE in progress, let's start one */
redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
return;
}
c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
}
c->repldbfd = -1;
c->flags |= REDIS_SLAVE;
c->slaveseldb = 0;
listAddNodeTail(server.slaves,c);
return;
}
void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
redisClient *slave = privdata;
REDIS_NOTUSED(el);
REDIS_NOTUSED(mask);
char buf[REDIS_IOBUF_LEN];
ssize_t nwritten, buflen;
if (slave->repldboff == 0) {
/* Write the bulk write count before to transfer the DB. In theory here
* we don't know how much room there is in the output buffer of the
* socket, but in pratice SO_SNDLOWAT (the minimum count for output
* operations) will never be smaller than the few bytes we need. */
sds bulkcount;
bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
slave->repldbsize);
if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
{
sdsfree(bulkcount);
freeClient(slave);
return;
}
sdsfree(bulkcount);
}
lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
if (buflen <= 0) {
redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
(buflen == 0) ? "premature EOF" : strerror(errno));
freeClient(slave);
return;
}
if ((nwritten = write(fd,buf,buflen)) == -1) {
redisLog(REDIS_VERBOSE,"Write error sending DB to slave: %s",
strerror(errno));
freeClient(slave);
return;
}
slave->repldboff += nwritten;
if (slave->repldboff == slave->repldbsize) {
close(slave->repldbfd);
slave->repldbfd = -1;
aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
slave->replstate = REDIS_REPL_ONLINE;
if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
sendReplyToClient, slave) == AE_ERR) {
freeClient(slave);
return;
}
addReplySds(slave,sdsempty());
redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
}
}
/* This function is called at the end of every backgrond saving.
* The argument bgsaveerr is REDIS_OK if the background saving succeeded
* otherwise REDIS_ERR is passed to the function.
*
* The goal of this function is to handle slaves waiting for a successful
* background saving in order to perform non-blocking synchronization. */
void updateSlavesWaitingBgsave(int bgsaveerr) {
listNode *ln;
int startbgsave = 0;
listIter li;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
redisClient *slave = ln->value;
if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
startbgsave = 1;
slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
} else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
struct redis_stat buf;
if (bgsaveerr != REDIS_OK) {
freeClient(slave);
redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
continue;
}
if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
redis_fstat(slave->repldbfd,&buf) == -1) {
freeClient(slave);
redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
continue;
}
slave->repldboff = 0;
slave->repldbsize = buf.st_size;
slave->replstate = REDIS_REPL_SEND_BULK;
aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
freeClient(slave);
continue;
}
}
}
if (startbgsave) {
if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
listIter li;
listRewind(server.slaves,&li);
redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
while((ln = listNext(&li))) {
redisClient *slave = ln->value;
if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
freeClient(slave);
}
}
}
}
int syncWithMaster(void) {
char buf[1024], tmpfile[256], authcmd[1024];
long dumpsize;
int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
int dfd, maxtries = 5;
if (fd == -1) {
redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
strerror(errno));
return REDIS_ERR;
}
/* AUTH with the master if required. */
if(server.masterauth) {
snprintf(authcmd, 1024, "AUTH %s\r\n", server.masterauth);
if (syncWrite(fd, authcmd, strlen(server.masterauth)+7, 5) == -1) {
close(fd);
redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",
strerror(errno));
return REDIS_ERR;
}
/* Read the AUTH result. */
if (syncReadLine(fd,buf,1024,3600) == -1) {
close(fd);
redisLog(REDIS_WARNING,"I/O error reading auth result from MASTER: %s",
strerror(errno));
return REDIS_ERR;
}
if (buf[0] != '+') {
close(fd);
redisLog(REDIS_WARNING,"Cannot AUTH to MASTER, is the masterauth password correct?");
return REDIS_ERR;
}
}
/* Issue the SYNC command */
if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
close(fd);
redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
strerror(errno));
return REDIS_ERR;
}
/* Read the bulk write count */
if (syncReadLine(fd,buf,1024,3600) == -1) {
close(fd);
redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
strerror(errno));
return REDIS_ERR;
}
if (buf[0] != '$') {
close(fd);
redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
return REDIS_ERR;
}
dumpsize = strtol(buf+1,NULL,10);
redisLog(REDIS_NOTICE,"Receiving %ld bytes data dump from MASTER",dumpsize);
/* Read the bulk write data on a temp file */
while(maxtries--) {
snprintf(tmpfile,256,
"temp-%d.%ld.rdb",(int)time(NULL),(long int)getpid());
dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644);
if (dfd != -1) break;
sleep(1);
}
if (dfd == -1) {
close(fd);
redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
return REDIS_ERR;
}
while(dumpsize) {
int nread, nwritten;
nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
if (nread == -1) {
redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
strerror(errno));
close(fd);
close(dfd);
return REDIS_ERR;
}
nwritten = write(dfd,buf,nread);
if (nwritten == -1) {
redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
close(fd);
close(dfd);
return REDIS_ERR;
}
dumpsize -= nread;
}
close(dfd);
if (rename(tmpfile,server.dbfilename) == -1) {
redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
unlink(tmpfile);
close(fd);
return REDIS_ERR;
}
emptyDb();
if (rdbLoad(server.dbfilename) != REDIS_OK) {
redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
close(fd);
return REDIS_ERR;
}
server.master = createClient(fd);
server.master->flags |= REDIS_MASTER;
server.master->authenticated = 1;
server.replstate = REDIS_REPL_CONNECTED;
return REDIS_OK;
}
void slaveofCommand(redisClient *c) {
if (!strcasecmp(c->argv[1]->ptr,"no") &&
!strcasecmp(c->argv[2]->ptr,"one")) {
if (server.masterhost) {
sdsfree(server.masterhost);
server.masterhost = NULL;
if (server.master) freeClient(server.master);
server.replstate = REDIS_REPL_NONE;
redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
}
} else {
sdsfree(server.masterhost);
server.masterhost = sdsdup(c->argv[1]->ptr);
server.masterport = atoi(c->argv[2]->ptr);
if (server.master) freeClient(server.master);
server.replstate = REDIS_REPL_CONNECT;
redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
server.masterhost, server.masterport);
}
addReply(c,shared.ok);
}
......@@ -201,7 +201,7 @@ sds sdstrim(sds s, const char *cset) {
return s;
}
sds sdsrange(sds s, long start, long end) {
sds sdsrange(sds s, int start, int end) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
size_t newlen, len = sdslen(s);
......@@ -357,3 +357,28 @@ sds sdsfromlonglong(long long value) {
p++;
return sdsnewlen(p,32-(p-buf));
}
sds sdscatrepr(sds s, char *p, size_t len) {
s = sdscatlen(s,"\"",1);
while(len--) {
switch(*p) {
case '\\':
case '"':
s = sdscatprintf(s,"\\%c",*p);
break;
case '\n': s = sdscatlen(s,"\\n",1); break;
case '\r': s = sdscatlen(s,"\\r",1); break;
case '\t': s = sdscatlen(s,"\\t",1); break;
case '\a': s = sdscatlen(s,"\\a",1); break;
case '\b': s = sdscatlen(s,"\\b",1); break;
default:
if (isprint(*p))
s = sdscatprintf(s,"%c",*p);
else
s = sdscatprintf(s,"\\x%02x",(unsigned char)*p);
break;
}
p++;
}
return sdscatlen(s,"\"",1);
}
......@@ -36,8 +36,8 @@
typedef char *sds;
struct sdshdr {
long len;
long free;
int len;
int free;
char buf[];
};
......@@ -61,7 +61,7 @@ sds sdscatprintf(sds s, const char *fmt, ...);
#endif
sds sdstrim(sds s, const char *cset);
sds sdsrange(sds s, long start, long end);
sds sdsrange(sds s, int start, int end);
void sdsupdatelen(sds s);
int sdscmp(sds s1, sds s2);
sds *sdssplitlen(char *s, int len, char *sep, int seplen, int *count);
......@@ -69,5 +69,6 @@ void sdsfreesplitres(sds *tokens, int count);
void sdstolower(sds s);
void sdstoupper(sds s);
sds sdsfromlonglong(long long value);
sds sdscatrepr(sds s, char *p, size_t len);
#endif
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
#define REDIS_VERSION "2.1.1"
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册