Skip to content

Commit 5b407e1

Browse files
[In-Progress] Add per tick memory stats to redisai_vision runner. (#61)
* [add] Added per tick memory stats to redisai_vision runner. Exporting that info on the results json * [add] Added script to post process server start results
1 parent f2f6f14 commit 5b407e1

3 files changed

Lines changed: 133 additions & 1 deletion

File tree

cmd/aibench_run_inference_redisai_vision/main.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,12 +144,14 @@ func (p *Processor) CollectRunTimeMetrics() (ts int64, stats interface{}, err er
144144
var aicpu_rcv string
145145
var aiinfo_rcv []string
146146
var commandstats_rcv string
147+
var infomemory_rcv string
147148
var kvmap = make(map[string]interface{})
148149
pipeCmds := radix.Pipeline(
149150
radix.FlatCmd(&aicpu_rcv, "INFO", "MODULES"),
150151
radix.FlatCmd(&aiinfo_rcv, "AI.INFO", model),
151152
radix.FlatCmd(nil, "AI.INFO", model, "RESETSTAT"),
152153
radix.FlatCmd(&commandstats_rcv, "INFO", "COMMANDSTATS"),
154+
radix.FlatCmd(&infomemory_rcv, "INFO", "MEMORY"),
153155
radix.FlatCmd(nil, "CONFIG", "RESETSTAT"),
154156
)
155157
err = h.Do(pipeCmds)
@@ -158,6 +160,7 @@ func (p *Processor) CollectRunTimeMetrics() (ts int64, stats interface{}, err er
158160
}
159161
process_ainfo_reply(aiinfo_rcv, kvmap)
160162
process_commandstats_reply(commandstats_rcv, kvmap)
163+
process_memorystats_reply(infomemory_rcv, kvmap)
161164
process_info_modules_ai_cpu(aicpu_rcv, kvmap)
162165
hosts_metrics_map[metricsHosts[pos]] = kvmap
163166
}
@@ -203,6 +206,23 @@ func process_commandstats_reply(commandstats_rcv string, kvmap map[string]interf
203206
}
204207
}
205208

209+
func process_memorystats_reply(commandstats_rcv string, kvmap map[string]interface{}) {
210+
ai_cpu_idx := strings.Index(commandstats_rcv, "Memory")
211+
if ai_cpu_idx > -1 {
212+
ai_cpu_str := commandstats_rcv[ai_cpu_idx:]
213+
ai_cpu_metrics_str_arr := strings.Split(ai_cpu_str, "\r\n")[1:]
214+
for _, kv_str := range ai_cpu_metrics_str_arr {
215+
kv := strings.Split(kv_str, ":")
216+
if len(kv) == 2 {
217+
k := kv[0]
218+
v := kv[1]
219+
kvmap[k] = v
220+
}
221+
222+
}
223+
}
224+
}
225+
206226
func process_info_modules_ai_cpu(rcv string, kvmap map[string]interface{}) {
207227
ai_cpu_idx := strings.Index(rcv, "ai_cpu")
208228
if ai_cpu_idx > -1 {

scripts/redisai_common.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ DATA_FILE=${DATA_FILE:-${BULK_DATA_DIR}/${DATA_FILE_NAME}}
4646
INPUT_VISION_VAL_DIR=${INPUT_VISION_VAL_DIR:-datasets/vision/coco-2017-val/cropped-val2017/.}
4747
OUTPUT_VISION_FILE_NAME=${OUTPUT_VISION_FILE_NAME:-${BULK_DATA_DIR}/vision_tensors.out}
4848
NUM_VISION_INFERENCES=${NUM_VISION_INFERENCES:-11000}
49-
VISION_QUERIES_BURN_IN=${VISION_QUERIES_BURN_IN:-1000}
49+
VISION_QUERIES_BURN_IN=${VISION_QUERIES_BURN_IN:-100}
5050
VISION_IMAGE_REUSE_FACTOR=${VISION_IMAGE_REUSE_FACTOR:-1}
5151

5252
# How many concurrent workers - match num of cores, or default to 8
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import argparse
2+
import json
3+
import os
4+
5+
6+
def process_json_files(dirname: str, prefix: str = ""):
7+
workers_arr = []
8+
autobatching_arr = []
9+
tensorbatching_arr = []
10+
workers_autobatching_table_p50 = {}
11+
workers_autobatching_table_rps = {}
12+
workers_tensorbatching_table_p50 = {}
13+
workers_tensorbatching_table_rps = {}
14+
files_list = os.listdir(dirname)
15+
for fname in files_list:
16+
if ".json" in fname and ((prefix != "" and prefix in fname) or (prefix == "")):
17+
full_fname = "{}/{}".format(dirname, fname)
18+
with open(full_fname) as json_file:
19+
dd = json.load(json_file)
20+
workers = dd["Workers"]
21+
autobatching = dd["MetadataAutobatching"]
22+
tensorbatching = dd["TensorBatchSize"]
23+
rps = dd["OverallRates"]["overallOpsRate"]
24+
p50 = dd["OverallQuantiles"]["AllQueries"]["q50"]
25+
26+
# we fix the tensor batch size to 1 for autobatching
27+
if tensorbatching == 1:
28+
process_table_datapoint(autobatching, autobatching_arr, p50, workers, workers_arr,
29+
workers_autobatching_table_p50, full_fname)
30+
process_table_datapoint(autobatching, autobatching_arr, rps, workers, workers_arr,
31+
workers_autobatching_table_rps, full_fname)
32+
# we fix autobatching to 0 when doing tensor batching
33+
if autobatching == 0:
34+
process_table_datapoint(tensorbatching, tensorbatching_arr, p50, workers, workers_arr,
35+
workers_tensorbatching_table_p50, full_fname)
36+
process_table_datapoint(tensorbatching, tensorbatching_arr, rps, workers, workers_arr,
37+
workers_tensorbatching_table_rps, full_fname)
38+
39+
workers_arr.sort()
40+
autobatching_arr.sort()
41+
tensorbatching_arr.sort()
42+
return workers_arr, autobatching_arr, workers_autobatching_table_rps, workers_autobatching_table_p50, tensorbatching_arr, workers_tensorbatching_table_rps, workers_tensorbatching_table_p50
43+
44+
45+
def process_table_datapoint(metric_key, metric_arr, metric_value, workers, workers_arr, table, fname):
46+
metric_key_fname = "{}-fname".format(metric_key)
47+
if workers not in workers_arr:
48+
workers_arr.append(workers)
49+
if metric_key not in metric_arr:
50+
metric_arr.append(metric_key)
51+
if workers not in table:
52+
table[workers] = {}
53+
if metric_key not in table[workers]:
54+
table[workers][metric_key] = []
55+
table[workers][metric_key_fname] = []
56+
table[workers][metric_key].append(metric_value)
57+
table[workers][metric_key_fname].append(fname)
58+
59+
60+
def print_results_table(workers_arr, metric_arr, metric_table, metric_str, functor=min,
61+
print_last_server_runtime_stats=True, server_runtime_stats_metricname="used_memory_human"):
62+
print("Workers,{}".format(",".join(["{} {}".format(metric_str, x) for x in metric_arr])))
63+
for workersN in workers_arr:
64+
line = ["{} workers".format(workersN)]
65+
for metric_key in metric_arr:
66+
v = "n/a"
67+
metric_key_fname = "{}-fname".format(metric_key)
68+
if metric_key in metric_table[workersN]:
69+
v = functor(metric_table[workersN][metric_key])
70+
index = metric_table[workersN][metric_key].index(v)
71+
fname = metric_table[workersN][metric_key_fname][index]
72+
if print_last_server_runtime_stats:
73+
runtime_stats_metric = "n/a"
74+
with open(fname) as json_file:
75+
dd = json.load(json_file)
76+
server_runtime_stats = dd["ServerRunTimeStats"]
77+
ts = list(server_runtime_stats.keys())
78+
if len(ts) > 0:
79+
last_stat_key = ts[-1]
80+
first_host = list(server_runtime_stats[last_stat_key].keys())[0]
81+
runtime_stats_metric = server_runtime_stats[last_stat_key][first_host][
82+
server_runtime_stats_metricname]
83+
v = '{}'.format(runtime_stats_metric)
84+
85+
line.append(v)
86+
print(",".join([str(x) for x in line]))
87+
88+
89+
parser = argparse.ArgumentParser(
90+
description="Simple script to process RedisAI results JSON and output overall metrics",
91+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
92+
)
93+
parser.add_argument("--dir", type=str, required=True)
94+
parser.add_argument("--prefix", type=str, default="", help="prefix to filter the result files by")
95+
parser.add_argument("--server_runtime_stats_metricname", type=str, default="used_memory_human",
96+
help="The server runtime stat metric to extract from the last available datapoint per test")
97+
args = parser.parse_args()
98+
99+
workers_arr, autobatching_arr, workers_autobatching_table_rps, workers_autobatching_table_p50, tensorbatching_arr, workers_tensorbatching_table_rps, workers_tensorbatching_table_p50 = process_json_files(
100+
args.dir, args.prefix)
101+
print("-------------------")
102+
print("Using the Overall inferences/sec to decide which result is the best per test variation")
103+
print("-------------------")
104+
print("## Auto-batching {} variation".format(args.server_runtime_stats_metricname))
105+
print_results_table(workers_arr, autobatching_arr, workers_autobatching_table_rps, "Auto-batching", max, True,
106+
args.server_runtime_stats_metricname)
107+
print("")
108+
print("-------------------")
109+
print("## Tensor-batching {} variation".format(args.server_runtime_stats_metricname))
110+
print_results_table(workers_arr, tensorbatching_arr, workers_tensorbatching_table_rps, "Tensor-batching", max, True,
111+
args.server_runtime_stats_metricname)
112+
print("")

0 commit comments

Comments
 (0)