Simple Runtime Analyzer
Loading...
Searching...
No Matches
sample_utilities.hpp
1#pragma once
2
3#include <cmath>
4#include <cstddef>
5#include <filesystem>
6#include <format>
7#include <fstream>
8#include <iomanip>
9#include <ostream>
10#include <ranges>
11#include <set>
12#include <sstream>
13#include <stdexcept>
14#include <string>
15#include <type_traits>
16#include <utility>
17#include <vector>
18
19namespace sra
20{
21
22namespace detail
23{
24
25// Concept to check if a type can be inserted into an ostream
26template <typename T>
27concept StreamInsertable = requires(std::ostream& os, const T& t) {
28 { os << t } -> std::same_as<std::ostream&>;
29};
30
31// Concept for a function that can fill a vector of type T
32template <typename F, typename T>
33concept FillerFunction = std::invocable<F, std::vector<T>&, size_t>;
34
35// Concept for a function that serializes a sample
36template <typename S, typename Sample>
38 std::invocable<S, const Sample&> && std::is_convertible_v<std::invoke_result_t<S, const Sample&>, std::string>;
39
40inline std::string escape_json(const std::string& s) noexcept
41{
42 std::ostringstream o;
43 for (auto c : s)
44 {
45 switch (c)
46 {
47 case '"': o << "\\\""; break;
48 case '\\': o << "\\\\"; break;
49 case '\b': o << "\\b"; break;
50 case '\f': o << "\\f"; break;
51 case '\n': o << "\\n"; break;
52 case '\r': o << "\\r"; break;
53 case '\t': o << "\\t"; break;
54 default:
55 if ('\x00' <= c && c <= '\x1f')
56 {
57 o << "\\u" << std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(c);
58 }
59 else { o << c; }
60 }
61 }
62 return o.str();
63}
64
65} // namespace detail
66
68{
69 size_t round_to = 100;
70 double bias = 1.0;
71};
72
73inline size_t round_to(size_t value, size_t multiple) noexcept
74{
75 if (multiple == 0) return value;
76 return ((value + multiple / 2) / multiple) * multiple;
77}
78
79[[nodiscard]] inline std::vector<size_t>
80generate_sizes(size_t sample_count, size_t max_sample_size, const SampleSizeConfig& config = {})
81{
82 if (sample_count == 0 || max_sample_size < config.round_to) { return {}; }
83 if (sample_count == 1) { return {max_sample_size}; }
84
85 const double log_min = std::log10(static_cast<double>(config.round_to));
86 const double log_max = std::log10(static_cast<double>(max_sample_size));
87
88 std::set<size_t> seen;
89
90 // Oversample with bias
91 const size_t oversample = sample_count * 3;
92 for (size_t i = 0; i < oversample; ++i)
93 {
94 const double t = std::pow(static_cast<double>(i) / (oversample - 1), config.bias);
95 const double log_size = log_min + t * (log_max - log_min);
96 const size_t raw_size = static_cast<size_t>(std::round(std::pow(10.0, log_size)));
97 const size_t rounded = round_to(raw_size, config.round_to);
98
99 if (rounded <= max_sample_size) { seen.insert(rounded); }
100 }
101
102 // Ensure max_sample_size is included
103 if (max_sample_size % config.round_to == 0) { seen.insert(max_sample_size); }
104
105 std::vector<size_t> all_sizes(seen.begin(), seen.end());
106
107 // Fill in gaps if needed
108 while (all_sizes.size() < sample_count)
109 {
110 const size_t last = all_sizes.back();
111 const size_t next = last + config.round_to;
112 if (next <= max_sample_size) { all_sizes.push_back(next); }
113 else { break; }
114 }
115
116 // Uniform downsampling
117 std::vector<size_t> final_sizes;
118 if (sample_count > 0 && !all_sizes.empty())
119 {
120 final_sizes.reserve(sample_count);
121 const size_t available = all_sizes.size();
122
123 if (sample_count == 1) { final_sizes.push_back(all_sizes.back()); }
124 else
125 {
126 for (size_t i = 0; i < sample_count; ++i)
127 {
128 const double t = static_cast<double>(i) / (sample_count - 1);
129 const size_t index = static_cast<size_t>(std::round(t * (available - 1)));
130 final_sizes.push_back(all_sizes[index]);
131 }
132 }
133 }
134
135 // Ensure last value is max_sample_size
136 if (!final_sizes.empty() && max_sample_size % config.round_to == 0) { final_sizes.back() = max_sample_size; }
137
138 return final_sizes;
139}
140
141[[nodiscard]] inline std::vector<size_t> generate_sizes(size_t sample_count, size_t max_sample_size)
142{
143 return generate_sizes(sample_count, max_sample_size, SampleSizeConfig{});
144}
145
146template <typename T, detail::FillerFunction<T> F>
147[[nodiscard]] std::vector<std::vector<T>> generate_samples(F&& filler, const std::vector<size_t>& sizes)
148{
149 std::vector<std::vector<T>> result;
150 result.reserve(sizes.size());
151
152 for (size_t size : sizes)
153 {
154 std::vector<T> sample;
155 filler(sample, size);
156 result.push_back(std::move(sample));
157 }
158
159 return result;
160}
161
162template <std::ranges::range Iterable>
163requires detail::StreamInsertable<std::ranges::range_value_t<Iterable>>
164[[nodiscard]] std::string serialize_iterable(const Iterable& container)
165{
166 if (std::ranges::empty(container)) { return "[]"; }
167
168 auto it = std::ranges::begin(container);
169 std::string result = std::format("[{}", *it);
170
171 for (++it; it != std::ranges::end(container); ++it) { result += std::format(", {}", *it); }
172 result += "]";
173
174 return result;
175}
176
177template <std::ranges::range Container, detail::SampleSerializer<std::ranges::range_value_t<Container>> Serializer>
178void save_samples(const Container& samples, Serializer&& serializer, const std::filesystem::path& filename)
179{
180 std::ofstream file(filename);
181 if (!file.is_open()) { throw std::runtime_error("Error: Could not open file " + filename.string()); }
182
183 const auto ext = filename.extension();
184
185 if (ext == ".txt")
186 {
187 for (const auto& sample : samples) { file << serializer(sample) << "\n"; }
188 }
189 else if (ext == ".csv")
190 {
191 file << "sample_id,sample_data\n";
192 size_t id = 1;
193 for (const auto& sample : samples) { file << id++ << "," << serializer(sample) << "\n"; }
194 }
195 else if (ext == ".json")
196 {
197 file << "[\n";
198 bool first = true;
199 for (const auto& sample : samples)
200 {
201 if (!first) { file << ",\n"; }
202 file << " \"" << detail::escape_json(serializer(sample)) << "\"";
203 first = false;
204 }
205 file << "\n]\n";
206 }
207 else { throw std::runtime_error("Error: Unsupported file extension " + ext.string()); }
208}
209
210template <std::ranges::range Container>
211requires detail::StreamInsertable<std::ranges::range_value_t<Container>>
212void save_samples(const Container& samples, const std::filesystem::path& filename)
213{
215}
216
217} // namespace sra
Definition sample_utilities.hpp:33
Definition sample_utilities.hpp:37
Definition sample_utilities.hpp:27
std::string escape_json(const std::string &s) noexcept
Definition sample_utilities.hpp:40
Definition runtime_analyzer.hpp:16
size_t round_to(size_t value, size_t multiple) noexcept
Definition sample_utilities.hpp:73
std::string serialize_iterable(const Iterable &container)
Definition sample_utilities.hpp:164
std::vector< std::vector< T > > generate_samples(F &&filler, const std::vector< size_t > &sizes)
Definition sample_utilities.hpp:147
std::vector< size_t > generate_sizes(size_t sample_count, size_t max_sample_size, const SampleSizeConfig &config={})
Definition sample_utilities.hpp:80
void save_samples(const Container &samples, Serializer &&serializer, const std::filesystem::path &filename)
Definition sample_utilities.hpp:178
Definition sample_utilities.hpp:68
size_t round_to
Definition sample_utilities.hpp:69
double bias
Definition sample_utilities.hpp:70