diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 48d3489a2adafff8ea20a5320fddd5d4be1c7b8c..aebed3e747701144cf114303e9337cbe5cde3b88 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -51,14 +51,35 @@ * * [2] P. Flajolet, Éric Fusy, O. Gandouet, and F. Meunier. Hyperloglog: The * analysis of a near-optimal cardinality estimation algorithm. - */ + * + * The representation used by Redis is the following: + * + * +--------+--------+--------+------// //--+---------------------+ + * |11000000|22221111|33333322|55444444 .... | 64 bit cardinality | + * +--------+--------+--------+------// //--+---------------------+ + * + * The 6 bits counters are encoded one after the other starting from the + * LSB to the MSB, and using the next bytes as needed. + * + * At the end of the 16k counters, there is an additional 64 bit integer + * stored in little endian format with the latest cardinality computed that + * can be reused if the data structure was not modified since the last + * computation (this is useful because there are high probabilities that + * HLLADD operations don't modify the actual data structure and hence the + * approximated cardinality). + * + * When the most significant bit in the most significant byte of the cached + * cardinality is set, it means that the data structure was modified and + * we can't reuse the cached value that must be recomputed. */ #define REDIS_HLL_P 14 /* The greater is P, the smaller the error. */ #define REDIS_HLL_REGISTERS (1<db,c->argv[1]); notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"hlladd",c->argv[1],c->db->id); server.dirty++; + /* Invalidate the cached cardinality. */ + registers[REDIS_HLL_SIZE-1] |= (1<<7); } addReply(c, updated ? shared.cone : shared.czero); } @@ -447,6 +470,7 @@ void hllAddCommand(redisClient *c) { void hllCountCommand(redisClient *c) { robj *o = lookupKeyRead(c->db,c->argv[1]); uint8_t *registers; + uint64_t card; if (o == NULL) { /* No key? Cardinality is zero since no element was added, otherwise @@ -465,8 +489,32 @@ void hllCountCommand(redisClient *c) { REDIS_HLL_SIZE); return; } + + /* Check if the cached cardinality is valid. */ registers = o->ptr; - addReplyLongLong(c,hllCount(registers)); + if ((registers[REDIS_HLL_SIZE-1] & (1<<7)) == 0) { + /* Just return the cached value. */ + card = (uint64_t)registers[REDIS_HLL_SIZE-8]; + card |= (uint64_t)registers[REDIS_HLL_SIZE-7] << 8; + card |= (uint64_t)registers[REDIS_HLL_SIZE-6] << 16; + card |= (uint64_t)registers[REDIS_HLL_SIZE-5] << 24; + card |= (uint64_t)registers[REDIS_HLL_SIZE-4] << 32; + card |= (uint64_t)registers[REDIS_HLL_SIZE-3] << 40; + card |= (uint64_t)registers[REDIS_HLL_SIZE-2] << 48; + card |= (uint64_t)registers[REDIS_HLL_SIZE-1] << 56; + } else { + /* Recompute it and update the cached value. */ + card = hllCount(registers); + registers[REDIS_HLL_SIZE-8] = card & 0xff; + registers[REDIS_HLL_SIZE-7] = (card >> 8) & 0xff; + registers[REDIS_HLL_SIZE-6] = (card >> 16) & 0xff; + registers[REDIS_HLL_SIZE-5] = (card >> 24) & 0xff; + registers[REDIS_HLL_SIZE-4] = (card >> 32) & 0xff; + registers[REDIS_HLL_SIZE-3] = (card >> 40) & 0xff; + registers[REDIS_HLL_SIZE-2] = (card >> 48) & 0xff; + registers[REDIS_HLL_SIZE-1] = (card >> 56) & 0xff; + } + addReplyLongLong(c,card); } }