Add more checks to ddsperf and run on Travis

This adds options to check for "unreasonable" RSS growth, receipt of a
minimum number of samples and having run a minimum number of roundtrips.

Signed-off-by: Erik Boasson <eb@ilities.com>
This commit is contained in:
Erik Boasson 2020-02-11 12:50:00 +01:00 committed by eboasson
parent 701c6f5a5c
commit 8d6b308199
5 changed files with 126 additions and 12 deletions

View file

@ -184,6 +184,11 @@ bool record_cputime (struct record_cputime_state *state, const char *prefix, dds
return print_cputime (&state->s, prefix, false, true);
}
double record_cputime_read_rss (const struct record_cputime_state *state)
{
return state->s.maxrss;
}
struct record_cputime_state *record_cputime_new (dds_entity_t wr)
{
ddsrt_thread_list_id_t tids[100];
@ -251,6 +256,12 @@ bool record_cputime (struct record_cputime_state *state, const char *prefix, dds
(void) tnow;
}
double record_cputime_read_rss (const struct record_cputime_state *state)
{
(void) state;
return 0.0.
}
struct record_cputime_state *record_cputime_new (dds_entity_t wr)
{
(void) wr;

View file

@ -19,6 +19,7 @@ struct record_cputime_state;
struct record_cputime_state *record_cputime_new (dds_entity_t wr);
void record_cputime_free (struct record_cputime_state *state);
bool record_cputime (struct record_cputime_state *state, const char *prefix, dds_time_t tnow);
double record_cputime_read_rss (const struct record_cputime_state *state);
bool print_cputime (const struct CPUStats *s, const char *prefix, bool print_host, bool is_fresh);
#endif

View file

@ -151,6 +151,18 @@ static dds_duration_t ping_intv;
pongs had been received */
static uint32_t ping_timeouts = 0;
/* Maximum allowed increase in RSS between 2nd RSS sample and
final RSS sample: final one must be <=
init * (1 + rss_factor/100) + rss_term */
static bool rss_check = false;
static double rss_factor = 0;
static double rss_term = 0;
/* Minimum number of samples, minimum number of roundtrips to
declare the run a success */
static uint64_t min_received = 0;
static uint64_t min_roundtrips = 0;
static ddsrt_mutex_t disc_lock;
/* Publisher statistics and lock protecting it */
@ -207,6 +219,7 @@ struct subthread_arg_pongstat {
uint64_t min, max;
uint64_t sum;
uint32_t cnt;
uint64_t totcnt;
uint64_t *raw;
};
@ -733,6 +746,7 @@ static bool update_roundtrip (dds_instance_handle_t pubhandle, uint64_t tdelta,
if (x->cnt < PINGPONG_RAWSIZE)
x->raw[x->cnt] = tdelta;
x->cnt++;
x->totcnt++;
ddsrt_mutex_unlock (&pongstat_lock);
return allseen;
}
@ -742,6 +756,7 @@ static bool update_roundtrip (dds_instance_handle_t pubhandle, uint64_t tdelta,
x->pphandle = get_pphandle_for_pubhandle (pubhandle);
x->min = x->max = x->sum = tdelta;
x->cnt = 1;
x->totcnt = 1;
x->raw = malloc (PINGPONG_RAWSIZE * sizeof (*x->raw));
x->raw[0] = tdelta;
npongstat++;
@ -1343,7 +1358,7 @@ static int cmp_uint64 (const void *va, const void *vb)
return (*a == *b) ? 0 : (*a < *b) ? -1 : 1;
}
static void print_stats (dds_time_t tref, dds_time_t tnow, dds_time_t tprev, struct record_cputime_state *cputime_state, struct record_netload_state *netload_state)
static bool print_stats (dds_time_t tref, dds_time_t tnow, dds_time_t tprev, struct record_cputime_state *cputime_state, struct record_netload_state *netload_state)
{
char prefix[128];
const double ts = (double) (tnow - tref) / 1e9;
@ -1469,6 +1484,7 @@ static void print_stats (dds_time_t tref, dds_time_t tnow, dds_time_t tprev, str
if (output)
record_netload (netload_state, prefix, tnow);
fflush (stdout);
return output;
}
static void subthread_arg_init (struct subthread_arg *arg, dds_entity_t rd, uint32_t max_samples)
@ -1555,8 +1571,14 @@ OPTIONS:\n\
-d DEV:BW report network load for device DEV with nominal\n\
bandwidth BW in bits/s (e.g., eth0:1e9)\n\
-D DUR run for at most DUR seconds\n\
-N COUNT require at least COUNT matching participants\n\
-M DUR require those participants to match within DUR seconds\n\
-Q KEY:VAL set success criteria\n\
rss:X%% max allowed increase in RSS, in %%\n\
rss:X max allowed increase in RSS, in MB\n\
samples:N min received messages by \"sub\"\n\
roundtrips:N min roundtrips for \"pong\"\n\
minmatch:N require >= N matching participants\n\
maxwait:DUR require those participants to match\n\
within DUR seconds\n\
-R TREF timestamps in the output relative to TREF instead of\n\
process start\n\
-i ID use domain ID instead of the default domain\n\
@ -1867,7 +1889,7 @@ int main (int argc, char *argv[])
argv0 = argv[0];
while ((opt = getopt (argc, argv, "cd:D:i:n:k:uLK:T:M:N:R:h")) != EOF)
while ((opt = getopt (argc, argv, "cd:D:i:n:k:uLK:T:Q:R:h")) != EOF)
{
switch (opt)
{
@ -1897,11 +1919,28 @@ int main (int argc, char *argv[])
else if (strcmp (optarg, "UK1024") == 0) topicsel = UK1024;
else error3 ("%s: unknown topic\n", optarg);
break;
case 'M': maxwait = atof (optarg); if (maxwait <= 0) maxwait = HUGE_VAL; break;
case 'N': minmatch = (unsigned) atoi (optarg); break;
case 'Q': {
int pos;
double d;
unsigned long n;
if (sscanf (optarg, "rss:%lf%n", &d, &pos) == 1 && (optarg[pos] == 0 || optarg[pos] == '%')) {
if (optarg[pos] == 0) rss_term = d * 1048576.0; else rss_factor = 1.0 + d / 100.0;
rss_check = true;
} else if (sscanf (optarg, "samples:%lu%n", &n, &pos) == 1 && optarg[pos] == 0) {
min_received = (uint64_t) n;
} else if (sscanf (optarg, "roundtrips:%lu%n", &n, &pos) == 1 && optarg[pos] == 0) {
min_roundtrips = (uint64_t) n;
} else if (sscanf (optarg, "maxwait:%lf%n", &maxwait, &pos) == 1 && optarg[pos] == 0) {
maxwait = (maxwait <= 0) ? HUGE_VAL : maxwait;
} else if (sscanf (optarg, "minmatch:%lu%n", &n, &pos) == 1 && optarg[pos] == 0) {
minmatch = (uint32_t) n;
} else {
error3 ("-Q%s: invalid success criterium\n", optarg);
}
break;
}
case 'R': tref = 0; sscanf (optarg, "%"SCNd64, &tref); break;
case 'h': usage (); break;
default: error3 ("-%c: unknown option\n", opt); break;
case 'h': default: usage (); break;
}
}
@ -2006,11 +2045,17 @@ int main (int argc, char *argv[])
/* participants reader must exist before the "publication matched" or "subscription matched"
listener is invoked, or it won't be able to get the details (FIXME: even the DDS spec
has convenience functions for that ...) */
if ((rd_participants = dds_create_reader (dp, DDS_BUILTIN_TOPIC_DCPSPARTICIPANT, NULL, NULL)) < 0)
error2 ("dds_create_reader(participants) failed: %d\n", (int) rd_participants);
/* set listener later: DATA_AVAILABLE still has the nasty habit of potentially triggering
before the reader is accessible to the application via its handle */
listener = dds_create_listener (NULL);
dds_lset_data_available (listener, participant_data_listener);
if ((rd_participants = dds_create_reader (dp, DDS_BUILTIN_TOPIC_DCPSPARTICIPANT, NULL, listener)) < 0)
error2 ("dds_create_reader(participants) failed: %d\n", (int) rd_participants);
dds_set_listener (rd_participants, listener);
dds_delete_listener (listener);
/* then there is the matter of data arriving prior to setting the listener ... this state
of affairs is undoubtedly a bug */
participant_data_listener (rd_participants, NULL);
if ((rd_subscriptions = dds_create_reader (dp, DDS_BUILTIN_TOPIC_DCPSSUBSCRIPTION, NULL, NULL)) < 0)
error2 ("dds_create_reader(subscriptions) failed: %d\n", (int) rd_subscriptions);
if ((rd_publications = dds_create_reader (dp, DDS_BUILTIN_TOPIC_DCPSPUBLICATION, NULL, NULL)) < 0)
@ -2155,7 +2200,7 @@ int main (int argc, char *argv[])
const bool pingpong_waitset = (ping_intv != DDS_NEVER && ignorelocal == DDS_IGNORELOCAL_NONE) || pingpongmode == SM_WAITSET;
if (pingpong_waitset)
{
ddsrt_thread_create (&subpingtid, "ping", &attr, subpingthread_waitset, &subarg_pong);
ddsrt_thread_create (&subpingtid, "ping", &attr, subpingthread_waitset, &subarg_ping);
ddsrt_thread_create (&subpongtid, "pong", &attr, subpongthread_waitset, &subarg_pong);
}
else
@ -2179,6 +2224,7 @@ int main (int argc, char *argv[])
dds_time_t tnext = tstart + DDS_SECS (1);
dds_time_t tlast = tstart;
dds_time_t tnextping = (ping_intv == DDS_INFINITY) ? DDS_NEVER : (ping_intv == 0) ? tstart + DDS_SECS (1) : tstart + ping_intv;
double rss_init = 0.0, rss_final = 0.0;
while (!ddsrt_atomic_ld32 (&termflag) && tnow < tstop)
{
dds_time_t twakeup = DDS_NEVER;
@ -2244,12 +2290,17 @@ int main (int argc, char *argv[])
tnow = dds_time ();
if (tnext <= tnow)
{
print_stats (tref, tnow, tlast, cputime_state, netload_state);
bool output;
output = print_stats (tref, tnow, tlast, cputime_state, netload_state);
tlast = tnow;
if (tnow > tnext + DDS_MSECS (500))
tnext = tnow + DDS_SECS (1);
else
tnext += DDS_SECS (1);
if (rss_init == 0.0 && matchcount >= minmatch && output)
rss_init = record_cputime_read_rss (cputime_state);
rss_final = record_cputime_read_rss (cputime_state);
}
/* If a "real" ping doesn't result in the expected number of pongs within a reasonable
@ -2317,8 +2368,13 @@ int main (int argc, char *argv[])
dds_delete (rd_data);
uint64_t nlost = 0;
bool received_ok = true;
for (uint32_t i = 0; i < eseq_admin.nph; i++)
{
nlost += eseq_admin.stats[i].nlost;
if (eseq_admin.stats[i].nrecv < (uint64_t) min_received)
received_ok = false;
}
fini_eseq_admin (&eseq_admin);
subthread_arg_fini (&subarg_data);
subthread_arg_fini (&subarg_ping);
@ -2330,8 +2386,13 @@ int main (int argc, char *argv[])
ddsrt_mutex_destroy (&pubstat_lock);
hist_free (pubstat_hist);
free (pongwr);
bool roundtrips_ok = true;
for (uint32_t i = 0; i < npongstat; i++)
{
if (pongstat[i].totcnt < min_roundtrips)
roundtrips_ok = false;
free (pongstat[i].raw);
}
free (pongstat);
bool ok = true;
@ -2360,5 +2421,20 @@ int main (int argc, char *argv[])
printf ("[%"PRIdPID"] error: %"PRIu64" samples lost\n", ddsrt_getpid (), nlost);
ok = false;
}
if (!roundtrips_ok)
{
printf ("[%"PRIdPID"] error: too few roundtrips for some peers\n", ddsrt_getpid ());
ok = false;
}
if (!received_ok)
{
printf ("[%"PRIdPID"] error: too few samples received from some peers\n", ddsrt_getpid ());
ok = false;
}
if (rss_check && rss_final >= rss_init * rss_factor + rss_term)
{
printf ("[%"PRIdPID"] error: RSS grew too much (%f -> %f)\n", ddsrt_getpid (), rss_init, rss_final);
ok = false;
}
return ok ? 0 : 1;
}

23
src/tools/ddsperf/sanity.bash Executable file
View file

@ -0,0 +1,23 @@
exitcode=0
# RSS/samples/roundtrip numbers are based on experimentation on Travis
bin/ddsperf -L -D10 -n10 -Qminmatch:2 -Qrss:10% -Qrss:0.5 -Qsamples:300000 -Qroundtrips:3000 sub ping & ddsperf_pids=$!
bin/ddsperf -L -D10 -n10 -Qminmatch:2 -Qrss:10% -Qrss:0.5 pub & ddsperf_pids="$ddsperf_pids $!"
sleep 11
for pid in $ddsperf_pids ; do
if kill -0 $pid 2>/dev/null ; then
echo "killing process $pid"
kill -9 $pid
exitcode=2
fi
wait $pid
x=$?
if [[ $x -gt $exitcode ]] ; then
exitcode=$x
fi
done
if [[ $exitcode -gt 0 ]] ; then
echo "** FAILED **"
else
echo "** OK **"
fi
exit $exitcode