From a4772a892ad37113df4767236aa344180ba88382 Mon Sep 17 00:00:00 2001 From: Fabio Salvini Date: Tue, 27 Jun 2017 23:18:58 +0200 Subject: [PATCH] Sources and README --- README.md | 51 +++++++++++++- src/benchmarker.erl | 107 +++++++++++++++++++++++++++++ src/partition.erl | 16 +++++ src/requestWorker.erl | 37 ++++++++++ src/requester.erl | 153 ++++++++++++++++++++++++++++++++++++++++++ src/resources.erl | 43 ++++++++++++ src/shuffle.erl | 17 +++++ src/statistician.erl | 90 +++++++++++++++++++++++++ 8 files changed, 512 insertions(+), 2 deletions(-) create mode 100644 src/benchmarker.erl create mode 100644 src/partition.erl create mode 100644 src/requestWorker.erl create mode 100644 src/requester.erl create mode 100644 src/resources.erl create mode 100644 src/shuffle.erl create mode 100644 src/statistician.erl diff --git a/README.md b/README.md index 28f532c..1fee0d5 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,50 @@ -# SPSBenchmark +SPSBenchmark +===== -Wikimirror benchmark for the SPS course. \ No newline at end of file +Benchmark tool for the Software Performance and Scalability course. + +Configuration +----- + +Edit: spsBenchmark.app.src + +Parameters: + - domain: The url without the resource. + - pagefile: file with all the resources (one per line). + - nPages: how many resource to use (the shuffling may take some time). + +Then + + $ make config + +Build +----- + + $ make + +Documentation +----- + + $ make doc + +Usage +----- +Start the console + + $ erl -pa ebin + +Initiliaze the benchmarker + + > benchmarker:init(). + +Service time + + > benchmarker:startServiceTime(Duration, Warmup). + +Open loop test + + > benchmarker:startOpenLoop(Rate, Duration, Warmup). + +Closed loop test + + > benchmarker:startClosedLoop(Rate, Population, Duration, Warmup). diff --git a/src/benchmarker.erl b/src/benchmarker.erl new file mode 100644 index 0000000..6db358a --- /dev/null +++ b/src/benchmarker.erl @@ -0,0 +1,107 @@ +-module(benchmarker). +-export([init/0, startServiceTime/2, startOpenLoop/3, + startClosedLoop/4, stop/0, inactive/0]). + +%% @spec init() -> void() +%% @doc Initialize the benchmark. +%% This must be done only the first time. +init() -> + application:start(spsBenchmark), + inets:start(), + ssl:start(), + statistician:init(), + BenchmarkerPid = spawn(?MODULE, inactive, []), + register(benchmarker, BenchmarkerPid). + +%% @spec startServiceTime(Duration::integer(), Warmup::integer()) -> void() +%% @doc Start the benchmark to calculate the service time. +startServiceTime(Duration, Warmup) -> + benchmarker ! + {start_benchmark, service_time, Duration * 1000, Warmup * 1000}. + +%% @spec startOpenLoop(Rate::float(), Duration::integer(), +%% Warmup::integer()) -> void +%% @doc Start the benchmark in an open loop. +startOpenLoop(Rate, Duration, Warmup) -> + benchmarker ! + {start_benchmark, open_loop, Rate, Duration * 1000, Warmup * 1000}. + +%% @spec startClosedLoop(Rate::float(), NumWorkers::integer(), +%% Duration::integer(), Warmup::integer()) -> void() +%% @doc Start the benchmark in a closed loop. +startClosedLoop(Rate, NumWorkers, Duration, Warmup) -> + benchmarker ! + {start_benchmark, closed_loop, Rate, NumWorkers, + Duration * 1000, Warmup * 1000}. + +%% @spec stop() -> void +%% @doc Stop the benchmark. +stop() -> + benchmarker ! {stop_benchmark}. + +%% @spec inactive() -> void() +%% @doc The benchmark is inactive, wait for a message to start. +inactive() -> + receive + {start_benchmark, service_time, Duration, Warmup} -> + RequesterPid = requester:init(), + erlang:monitor(process, RequesterPid), + requester:startServiceTime(RequesterPid), + io:format("Starting service time benchmark~n"), + warmup(RequesterPid, Duration, Warmup); + {start_benchmark, open_loop, Rate, Duration, Warmup} -> + RequesterPid = requester:init(), + erlang:monitor(process, RequesterPid), + requester:startOpenLoop(RequesterPid, Rate), + io:format("Starting open loop benchmark~n"), + warmup(RequesterPid, Duration, Warmup); + {start_benchmark, closed_loop, Rate, NumWorkers, Duration, Warmup} -> + RequesterPid = requester:init(), + erlang:monitor(process, RequesterPid), + requester:startClosedLoop(RequesterPid, Rate, NumWorkers), + io:format("Starting closed loop benchmark~n"), + warmup(RequesterPid, Duration, Warmup) + end. + +%% @spec warmup(RequesterPid::pid(), Duration::integer(), +%% Warmup::integer()) -> void() +%% @doc Start the warmup and start recording statistics at the end of it. +warmup(RequesterPid, Duration, Warmup) -> + receive + {stop_benchmark} -> + RequesterPid ! {stop} + after Warmup -> + statistician:startRecording(), + active(Duration, RequesterPid) + end. + +%% @spec active(Duration::integer(), RequesterPid::pid()) -> void() +%% @doc The benchmark is active, print the statistics +%% periodically and when the benchmark ends. +active(Duration, RequesterPid) -> + receive + {'DOWN', _MonitorReference, process, _Pid, _Reason} -> + io:format("Requester ended before benchmarker finished~n"), + statistician:printStatistics(), + statistician:stopRecording(), + inactive(); + {stop_benchmark} -> + RequesterPid ! {stop} + after Duration -> + requester:printActiveWorkers(RequesterPid), + requester:stop(RequesterPid), + statistician:printStatistics(), + statistician:stopRecording(), + deactivate() + end. + +%% @spec deactivate() -> void() +%% @doc The benchmark waits the requester to finish. +%% This make sure that there won't be any active requests +%% before starting a new benchmark. +deactivate() -> + receive + {'DOWN', _MonitorReference, process, _Pid, _Reason} -> + % io:format("Benchmarker is ready~n"), + inactive() + end. diff --git a/src/partition.erl b/src/partition.erl new file mode 100644 index 0000000..8d99cd4 --- /dev/null +++ b/src/partition.erl @@ -0,0 +1,16 @@ +-module(partition). +-export([list/2]). + +list(List, N) -> + RevList = split_list(List, N), + lists:foldl(fun(E, Acc) -> [lists:reverse(E)|Acc] end, [], RevList). + +split_list(List, Max) -> + element(1, lists:foldl( + fun (E, {[Buff|Acc], C}) when C < Max -> + {[[E|Buff]|Acc], C+1}; + (E, {[Buff|Acc], _}) -> + {[[E],Buff|Acc], 1}; + (E, {[], _}) -> + {[[E]], 1} + end, {[], 0}, List)). diff --git a/src/requestWorker.erl b/src/requestWorker.erl new file mode 100644 index 0000000..a298c17 --- /dev/null +++ b/src/requestWorker.erl @@ -0,0 +1,37 @@ +-module(requestWorker). +-export([request/1, loop_request/2]). + +%% @spec request(Url::string()) -> void() +%% @doc Make a request to the given url and record the response time. +%% The result it's sent to the statistician process. +request(Url) -> + Begin = os:system_time(), + Response = httpc:request(Url), + End = os:system_time(), + case Response of + {ok, {{_Version, 200, _ReasonPhrase}, _Headers, _Body}} -> + statistician ! {ok, Begin, End}; + {ok, {{_Version, 404, _ReasonPhrase}, _Headers, _Body}} -> + io:format("Error 404: ~p~n", [Url]); + {ok, {{_Version, Result, _ReasonPhrase}, _Headers, _Body}} -> + io:format("Error: ~p~nUrl: ~p~n", [Result, Url]), + statistician ! {error, Begin, End}; + {error, Reason} -> + io:format("Error: ~p~n", [Reason]), + statistician ! {connection_error, Begin, End} + end. + +%% @spec loop_request(Urls::[string()], Rate::float()) -> void() +%% @doc Make a request, wait using the Rate and then proceed with the next +%% request. +loop_request(Urls, Rate) -> + case Urls of + [] -> ok; + [Url | RemainingUrls] -> + request(Url), + receive + {stop} -> ok + after requester:exponential_time(Rate) -> + loop_request(RemainingUrls, Rate) + end + end. diff --git a/src/requester.erl b/src/requester.erl new file mode 100644 index 0000000..4de6bd5 --- /dev/null +++ b/src/requester.erl @@ -0,0 +1,153 @@ +-module(requester). +-export([init/0, startServiceTime/1, startOpenLoop/2, startClosedLoop/3, + waiting/1, stop/1, printActiveWorkers/1, exponential_time/1]). + +%% @spec init() -> pid() +%% @doc Initialize the requester. +%% This fuction will not return until all the pages will be read from +%% the file and shuffled. +init() -> + Filename = application:get_env(spsBenchmark, pageFile, "pages.csv"), + Domain = application:get_env(spsBenchmark, domain, "https://en.wikipedia.org/wiki/"), + NumPages = application:get_env(spsBenchmark, nPages, 1000), + Pages = shuffle:list(resources:n_pages(Filename, 0, NumPages)), + Links = resources:urls(Domain, Pages), + Pid = spawn(?MODULE, waiting, [Links]), + Pid. + +%% @spec waiting(Links::[string()]) -> void() +%% @doc Keep the links while waiting to start the requester. +waiting(Links) -> + receive + {service_time} -> service_time(Links); + {open_loop, Rate} -> open_loop(Rate, Links, 0); + {closed_loop, Rate, NumWorkers} -> closed_loop(Rate, NumWorkers, Links) + end. + +%% @spec startServiceTime(Pid::pid()) -> void() +%% @doc Start the requests to calculate the service time. +startServiceTime(Pid) -> + Pid ! {service_time}. + +%% @spec startOpenLoop(Pid::pid(), Rate::float()) -> void() +%% @doc Start the open loop requests. +startOpenLoop(Pid, Rate) -> + Pid ! {open_loop, Rate}. + +%% @spec startClosedLoop(Pid::pid(), Rate::float(), +%% NumWorkers::integer()) -> void() +%% @doc Start the closed loop requests. +startClosedLoop(Pid, Rate, NumWorkers) -> + Pid ! {closed_loop, Rate, NumWorkers}. + +%% @spec stop(Pid::pid()) -> void() +%% @doc Stop the benchmark. +stop(Pid) -> + Pid ! {stop}. + +%% @spec printActiveWorkers(Pid::pid()) -> void() +%% @doc Print the number of active workers of the requester with the +%% specific pid. +printActiveWorkers(Pid) -> + Pid ! {print_active_workers}. + +%% @spec service_time(Links::[string()]) -> void() +%% @doc Generate a single request to calculate the service time and then wait. +service_time(Links) -> + case Links of + [] -> ok; + [Link | RemainigLinks] -> + generate_worker(Link), + service_time_wait(RemainigLinks) + end. + +%% @spec service_time_wait(Links::[string()]) -> void() +%% @doc Wait until the service time request ends and then generate a new one. +service_time_wait(Links) -> + receive + {'DOWN', _MonitorReference, process, _Pid, _Reason} -> + service_time(Links); + {print_active_workers} -> + service_time_wait(Links); + {stop} -> ok + end. + +%% @spec open_loop(Rate::float(), Links::[string()], +%% ActiveWorkers::integer()) -> ok +%% @doc Generate requests in open loop with an exponential sampling. +open_loop(Rate, Links, ActiveWorkers) -> + receive + {'DOWN', _MonitorReference, process, _Pid, _Reason} -> + open_loop(Rate, Links, ActiveWorkers - 1); + {print_active_workers} -> + io:format("Num active requests: ~p~n", [ActiveWorkers]), + open_loop(Rate, Links, ActiveWorkers); + {stop} -> ok + after exponential_time(Rate) -> + case Links of + [] -> case ActiveWorkers of + 0 -> ok; + _ -> open_loop(Rate, Links, ActiveWorkers) + end; + [Link | RemainigLinks] -> + generate_worker(Link), + open_loop(Rate, RemainigLinks, ActiveWorkers + 1) + end + end. + +%% @spec closed_loop(Rate::float(), NumWorkers::integer(), +%% Links::[string()]) -> ok +%% @doc Generate requests in closed loop. +closed_loop(Rate, NumWorkers, Links) -> + closed_loop_aux(Rate, NumWorkers, + partition:list(Links, length(Links) div NumWorkers)). + +%% @spec closed_loop_aux(Rate::float(), NumWorkers::integer(), +%% LinksLists::[[string()]]) -> ok +%% @doc Auxiliary function for closed_loop that takes the Links already +%% partitioned for each worker. +closed_loop_aux(Rate, NumWorkers, LinksLists) -> + case NumWorkers of + 0 -> closed_loop_wait(); + _ -> + [WorkersLinks | OtherLinks] = LinksLists, + generate_loop_worker(WorkersLinks, Rate), + closed_loop_aux(Rate, NumWorkers - 1, OtherLinks) + end. + +%% @spec closed_loop_wait() -> void() +%% @doc Wait until the closed loop is stopped. +closed_loop_wait() -> + receive + {stop} -> exit(shutdown) + end. + +%% @spec exponential_time(Rate::float()) -> integer() +%% @doc Get the number of millisecond to wait sampling +%% an exponential variable with specific rate. +exponential_time(Rate) -> + round(exponential(Rate) * 1000). + +%% @spec exponential(Rate::float()) -> float() +%% @doc Get the sample of an exponential variable with specific rate. +exponential(Rate) -> + -math:log(rand:uniform()) / Rate. + +%% @spec generate_worker(Url::string()) -> void +%% @doc Spawn and monitor a worker for a given url. +generate_worker(Url) -> + spawn_monitor( + requestWorker, + request, + [Url] + ). + +%% @spec generate_loop_worker(Urls::[string()], Rate::float()) -> void +%% @doc Spawn and link to a worker that makes requests in a loop +%% using the given urls. +generate_loop_worker(Urls, Rate) -> + spawn_link( + requestWorker, + loop_request, + [Urls, Rate] + ). diff --git a/src/resources.erl b/src/resources.erl new file mode 100644 index 0000000..86d5625 --- /dev/null +++ b/src/resources.erl @@ -0,0 +1,43 @@ +-module(resources). +-export([n_pages/3, urls/2]). + +%% @spec n_pages(FileName::string(), FromLine::integer(), +%% ToLine::integer()) -> [string()] +%% @doc Read the file specifing the first and last line indexes. +n_pages(FileName, FromLine, ToLine) -> + {ok, FileDev} = file:open(FileName, [raw, read, read_ahead]), + try + read_lines(FileDev, FromLine, ToLine, []) + after + file:close(FileDev) + end. + +%% @spec read_lines(FileDev::IoDevice, FromLine::integer(), +%% ToLine::integer(), Acc::[string()]) -> [string()] +%% @doc Read the file specifing the first and last line indexes. +%% The file is not closed by this function. +read_lines(_FileDev, 0, 0, Acc) -> + Acc; +read_lines(FileDev, 0, ToLine, Acc) -> + case file:read_line(FileDev) of + eof -> Acc; + {ok, Line} -> + Page = string:strip(Line, right, $\n), + read_lines(FileDev, 0, ToLine - 1, [Page | Acc]) + end; +read_lines(FileDev, FromLine, ToLine, Acc) -> + case file:read_line(FileDev) of + eof -> Acc; + {ok, _} -> read_lines(FileDev, FromLine - 1, ToLine - 1, Acc) + end. + +%% @spec urls(Domain::string(), Pages::[string()]) -> [string()] +%% @doc Transform the given pages to urls prefixing the specified domain. +urls(Domain, Pages) -> + [Domain ++ Page || Page <- Pages, valid_page(Page)]. + +%% @spec valid_page(Page::string()) -> boolean +%% @doc Check if a page is valid. +valid_page(Page) -> + not lists:suffix(".jpg", Page) + and not lists:suffix(".png", Page). diff --git a/src/shuffle.erl b/src/shuffle.erl new file mode 100644 index 0000000..7d41fc7 --- /dev/null +++ b/src/shuffle.erl @@ -0,0 +1,17 @@ +-module(shuffle). +-export([list/1]). + +list([]) -> []; +list([Elem]) -> [Elem]; +list(List) -> list(List, length(List), []). + +list([], 0, Result) -> + Result; +list(List, Len, Result) -> + {Elem, Rest} = nth_rest(rand:uniform(Len), List), + list(Rest, Len - 1, [Elem|Result]). + +nth_rest(N, List) -> nth_rest(N, List, []). + +nth_rest(1, [E|List], Prefix) -> {E, Prefix ++ List}; +nth_rest(N, [E|List], Prefix) -> nth_rest(N - 1, List, [E|Prefix]). diff --git a/src/statistician.erl b/src/statistician.erl new file mode 100644 index 0000000..7753332 --- /dev/null +++ b/src/statistician.erl @@ -0,0 +1,90 @@ +-module(statistician). +-export([init/0, startRecording/0, inactive/0, stopRecording/0, printStatistics/0]). + +-record(stats, {requestsCount = 0, + errorsCount = 0, + responseTimesAvg = 0, + responseTimesSquareAvg = 0 + }). + +%% @spec init() -> void() +%% @doc Initialize the statistician. +%% The process it's named "statistician". +init() -> + StatisticianPid = spawn(?MODULE, inactive, []), + register(statistician, StatisticianPid). + +%% @spec stopRecording() -> void() +%% @doc Stop recording the statistics. +stopRecording() -> + statistician ! {stop_recording}. + +%% @spec startRecording() -> void() +%% @doc Start recording the statistics. +startRecording() -> + statistician ! {start_recording}. + +%% @spec inactive() -> void() +%% @doc Remain inactive waiting to record the statistics. +%% All messages received while waiting are discarded. +inactive() -> + receive + {start_recording} -> recording(#stats{}); + _ -> inactive() + end. + +%% @spec recording(S::#stats{}) -> ok +%% @doc Calculate the statistics of the received response times. +recording(S = #stats{requestsCount = RequestsCount, + errorsCount = ErrorsCount, + responseTimesAvg = ResponseTimesAvg, + responseTimesSquareAvg = ResponseTimesSquareAvg}) -> + receive + {ok, Begin, End} -> + NewResponseTime = (End - Begin) / 1000000, + recording(S#stats{requestsCount = RequestsCount + 1, + responseTimesAvg = + average(RequestsCount, ResponseTimesAvg, NewResponseTime), + responseTimesSquareAvg = + average(RequestsCount, ResponseTimesSquareAvg, math:pow(NewResponseTime, 2)) + }); + {error, _Begin, _End} -> + recording(S#stats{errorsCount = ErrorsCount = 1}); + {connection_error, _Begin, _End} -> + recording(S#stats{errorsCount = ErrorsCount = 1}); + {printStats} -> + Variance = variance(S), + io:format( + "Num requests: ~p~nAverage response time: ~pms~nVariance: ~p~nDeviation: ~p~nErrors: ~p~n", + [RequestsCount, ResponseTimesAvg, Variance, math:sqrt(Variance), ErrorsCount] + ), + recording(S); + {stop_recording} -> + inactive() + end. + +%% @spec printStatistics() -> void() +%% @doc Print the statistics of the requests completed so far. +printStatistics() -> + statistician ! {printStats}. + +%% @spec average(OldCount::integer(), OldAvg::float(), +%% NewValue::float()) -> float() +%% @doc Calculate the average given the old count, the old average and +%% the new value. +average(OldCount, OldAvg, NewValue) -> + case OldCount of + 0 -> NewValue; + _ -> OldAvg * (OldCount / (OldCount + 1)) + NewValue / (OldCount + 1) + end. + +%% @spec variance(#stats{}) -> float() +%% @doc Calculate the variance of the response time. +%% If the number of requests is zero then it returns zero. +variance(#stats{requestsCount = RequestsCount, + responseTimesAvg = ResponseTimesAvg, + responseTimesSquareAvg = ResponseTimesSquareAvg}) -> + case RequestsCount of + 0 -> 0; + _ -> ResponseTimesSquareAvg - math:pow(ResponseTimesAvg, 2) + end.