提交 cf34507b 编写于 作者: A antirez

hll-gnuplot-graph.rb improved with new filter.

The function to generate graphs is also more flexible as now includes
step and max value. The step of the samples generation function is no
longer limited to min step of 1000.
上级 096b5e92
...@@ -10,7 +10,7 @@ require 'redis' ...@@ -10,7 +10,7 @@ require 'redis'
require 'digest/sha1' require 'digest/sha1'
# Generate an array of [cardinality,relative_error] pairs # Generate an array of [cardinality,relative_error] pairs
# in the 0 - max range with step of 1000*step. # in the 0 - max range, with the specified step.
# #
# 'r' is the Redis object used to perform the queries. # 'r' is the Redis object used to perform the queries.
# 'seed' must be different every time you want a test performed # 'seed' must be different every time you want a test performed
...@@ -22,16 +22,15 @@ def run_experiment(r,seed,max,step) ...@@ -22,16 +22,15 @@ def run_experiment(r,seed,max,step)
r.del('hll') r.del('hll')
i = 0 i = 0
samples = [] samples = []
step = 1000 if step > 1000
while i < max do while i < max do
elements = []
step.times { step.times {
elements = [] ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s)
1000.times { elements << ele
ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s) i += 1
elements << ele
i += 1
}
r.pfadd('hll',*elements)
} }
r.pfadd('hll',*elements)
approx = r.pfcount('hll') approx = r.pfcount('hll')
err = approx-i err = approx-i
rel_err = 100.to_f*err/i rel_err = 100.to_f*err/i
...@@ -40,11 +39,12 @@ def run_experiment(r,seed,max,step) ...@@ -40,11 +39,12 @@ def run_experiment(r,seed,max,step)
samples samples
end end
def filter_samples(numsets,filter) def filter_samples(numsets,max,step,filter)
r = Redis.new r = Redis.new
dataset = {} dataset = {}
(0...numsets).each{|i| (0...numsets).each{|i|
dataset[i] = run_experiment(r,i,100000,1) dataset[i] = run_experiment(r,i,max,step)
STDERR.puts "Set #{i}"
} }
dataset[0].each_with_index{|ele,index| dataset[0].each_with_index{|ele,index|
if filter == :max if filter == :max
...@@ -62,6 +62,14 @@ def filter_samples(numsets,filter) ...@@ -62,6 +62,14 @@ def filter_samples(numsets,filter)
} }
err /= numsets err /= numsets
puts "#{card} #{err}" puts "#{card} #{err}"
elsif filter == :absavg
card=ele[0]
err = 0
(0...numsets).each{|i|
err += dataset[i][index][1].abs
}
err /= numsets
puts "#{card} #{err}"
elsif filter == :all elsif filter == :all
(0...numsets).each{|i| (0...numsets).each{|i|
card,err = dataset[i][index] card,err = dataset[i][index]
...@@ -73,6 +81,7 @@ def filter_samples(numsets,filter) ...@@ -73,6 +81,7 @@ def filter_samples(numsets,filter)
} }
end end
filter_samples(100,:all) filter_samples(100,100000,1000,:absavg)
#filter_samples(100,:max) #filter_samples(100,1000,10,:all)
#filter_samples(100,:avg) #filter_samples(100,10000,1000,:max)
#filter_samples(100,10000,1000,:avg)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册