Simple Runtime Analyzer
Loading...
Searching...
No Matches
sample_utilities.hpp
1
13#pragma once
14
15#include <cmath>
16#include <cstddef>
17#include <filesystem>
18#include <format>
19#include <fstream>
20#include <iomanip>
21#include <ostream>
22#include <ranges>
23#include <set>
24#include <sstream>
25#include <stdexcept>
26#include <string>
27#include <type_traits>
28#include <utility>
29#include <vector>
30
31namespace sra
32{
33
34namespace detail
35{
36
37// Concept to check if a type can be inserted into an ostream
38template <typename T>
39concept StreamInsertable = requires(std::ostream& os, const T& t) {
40 { os << t } -> std::same_as<std::ostream&>;
41};
42
43// Concept for a function that can fill a vector of type T
44template <typename F, typename T>
45concept FillerFunction = std::invocable<F, std::vector<T>&, size_t>;
46
47// Concept for a function that serializes a sample
48template <typename S, typename Sample>
50 std::invocable<S, const Sample&> && std::is_convertible_v<std::invoke_result_t<S, const Sample&>, std::string>;
51
52inline std::string escape_json(const std::string& s) noexcept
53{
54 std::ostringstream o;
55 for (auto c : s)
56 {
57 switch (c)
58 {
59 case '"': o << "\\\""; break;
60 case '\\': o << "\\\\"; break;
61 case '\b': o << "\\b"; break;
62 case '\f': o << "\\f"; break;
63 case '\n': o << "\\n"; break;
64 case '\r': o << "\\r"; break;
65 case '\t': o << "\\t"; break;
66 default:
67 if ('\x00' <= c && c <= '\x1f')
68 {
69 o << "\\u" << std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(c);
70 }
71 else { o << c; }
72 }
73 }
74 return o.str();
75}
76
77} // namespace detail
78
80{
81 size_t round_to = 100;
82 double bias = 1.0;
83};
84
85inline size_t round_to(size_t value, size_t multiple) noexcept
86{
87 if (multiple == 0) return value;
88 return ((value + multiple / 2) / multiple) * multiple;
89}
90
91[[nodiscard]] inline std::vector<size_t>
92generate_sizes(size_t sample_count, size_t max_sample_size, const SampleSizeConfig& config = {})
93{
94 if (sample_count == 0 || max_sample_size < config.round_to) { return {}; }
95 if (sample_count == 1) { return {max_sample_size}; }
96
97 const double log_min = std::log10(static_cast<double>(config.round_to));
98 const double log_max = std::log10(static_cast<double>(max_sample_size));
99
100 std::set<size_t> seen;
101
102 // Oversample with bias
103 const size_t oversample = sample_count * 3;
104 for (size_t i = 0; i < oversample; ++i)
105 {
106 const double t = std::pow(static_cast<double>(i) / (oversample - 1), config.bias);
107 const double log_size = log_min + t * (log_max - log_min);
108 const size_t raw_size = static_cast<size_t>(std::round(std::pow(10.0, log_size)));
109 const size_t rounded = round_to(raw_size, config.round_to);
110
111 if (rounded <= max_sample_size) { seen.insert(rounded); }
112 }
113
114 // Ensure max_sample_size is included
115 if (max_sample_size % config.round_to == 0) { seen.insert(max_sample_size); }
116
117 std::vector<size_t> all_sizes(seen.begin(), seen.end());
118
119 // Fill in gaps if needed
120 while (all_sizes.size() < sample_count)
121 {
122 const size_t last = all_sizes.back();
123 const size_t next = last + config.round_to;
124 if (next <= max_sample_size) { all_sizes.push_back(next); }
125 else { break; }
126 }
127
128 // Uniform downsampling
129 std::vector<size_t> final_sizes;
130 if (sample_count > 0 && !all_sizes.empty())
131 {
132 final_sizes.reserve(sample_count);
133 const size_t available = all_sizes.size();
134
135 if (sample_count == 1) { final_sizes.push_back(all_sizes.back()); }
136 else
137 {
138 for (size_t i = 0; i < sample_count; ++i)
139 {
140 const double t = static_cast<double>(i) / (sample_count - 1);
141 const size_t index = static_cast<size_t>(std::round(t * (available - 1)));
142 final_sizes.push_back(all_sizes[index]);
143 }
144 }
145 }
146
147 // Ensure last value is max_sample_size
148 if (!final_sizes.empty() && max_sample_size % config.round_to == 0) { final_sizes.back() = max_sample_size; }
149
150 return final_sizes;
151}
152
153[[nodiscard]] inline std::vector<size_t> generate_sizes(size_t sample_count, size_t max_sample_size)
154{
155 return generate_sizes(sample_count, max_sample_size, SampleSizeConfig{});
156}
157
158template <typename T, detail::FillerFunction<T> F>
159[[nodiscard]] std::vector<std::vector<T>> generate_samples(F&& filler, const std::vector<size_t>& sizes)
160{
161 std::vector<std::vector<T>> result;
162 result.reserve(sizes.size());
163
164 for (size_t size : sizes)
165 {
166 std::vector<T> sample;
167 filler(sample, size);
168 result.push_back(std::move(sample));
169 }
170
171 return result;
172}
173
174template <std::ranges::range Iterable>
175requires detail::StreamInsertable<std::ranges::range_value_t<Iterable>>
176[[nodiscard]] std::string serialize_iterable(const Iterable& container)
177{
178 if (std::ranges::empty(container)) { return "[]"; }
179
180 auto it = std::ranges::begin(container);
181 std::string result = std::format("[{}", *it);
182
183 for (++it; it != std::ranges::end(container); ++it) { result += std::format(", {}", *it); }
184 result += "]";
185
186 return result;
187}
188
189template <std::ranges::range Container, detail::SampleSerializer<std::ranges::range_value_t<Container>> Serializer>
190void save_samples(const Container& samples, Serializer&& serializer, const std::filesystem::path& filename)
191{
192 std::ofstream file(filename);
193 if (!file.is_open()) { throw std::runtime_error("Error: Could not open file " + filename.string()); }
194
195 const auto ext = filename.extension();
196
197 if (ext == ".txt")
198 {
199 for (const auto& sample : samples) { file << serializer(sample) << "\n"; }
200 }
201 else if (ext == ".csv")
202 {
203 file << "sample_id,sample_data\n";
204 size_t id = 1;
205 for (const auto& sample : samples) { file << id++ << "," << serializer(sample) << "\n"; }
206 }
207 else if (ext == ".json")
208 {
209 file << "[\n";
210 bool first = true;
211 for (const auto& sample : samples)
212 {
213 if (!first) { file << ",\n"; }
214 file << " \"" << detail::escape_json(serializer(sample)) << "\"";
215 first = false;
216 }
217 file << "\n]\n";
218 }
219 else { throw std::runtime_error("Error: Unsupported file extension " + ext.string()); }
220}
221
222template <std::ranges::range Container>
223requires detail::StreamInsertable<std::ranges::range_value_t<Container>>
224void save_samples(const Container& samples, const std::filesystem::path& filename)
225{
227}
228
229} // namespace sra
Definition sample_utilities.hpp:45
Definition sample_utilities.hpp:49
Definition sample_utilities.hpp:39
std::string escape_json(const std::string &s) noexcept
Definition sample_utilities.hpp:52
Definition runtime_analyzer.hpp:28
size_t round_to(size_t value, size_t multiple) noexcept
Definition sample_utilities.hpp:85
std::string serialize_iterable(const Iterable &container)
Definition sample_utilities.hpp:176
std::vector< std::vector< T > > generate_samples(F &&filler, const std::vector< size_t > &sizes)
Definition sample_utilities.hpp:159
std::vector< size_t > generate_sizes(size_t sample_count, size_t max_sample_size, const SampleSizeConfig &config={})
Definition sample_utilities.hpp:92
void save_samples(const Container &samples, Serializer &&serializer, const std::filesystem::path &filename)
Definition sample_utilities.hpp:190
Definition sample_utilities.hpp:80
size_t round_to
Definition sample_utilities.hpp:81
double bias
Definition sample_utilities.hpp:82