Skip to content

Commit aeefdcd

Browse files
committed
Store dense vectors using binsparse
1 parent 7898a21 commit aeefdcd

File tree

4 files changed

+219
-28
lines changed

4 files changed

+219
-28
lines changed

examples/convert_binsparse.cpp

Lines changed: 75 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,49 @@ void convert_to_binsparse(std::string input_file, std::string output_file,
6767
}
6868
}
6969

70+
template <typename T>
71+
void convert_to_binsparse_vector(std::string input_file,
72+
std::string output_file, std::string type,
73+
std::string comment,
74+
std::optional<std::string> group) {
75+
H5::H5File file;
76+
std::unique_ptr<H5::Group> f_p;
77+
78+
if (!group.has_value()) {
79+
f_p = std::unique_ptr<H5::Group>(
80+
new H5::H5File(output_file.c_str(), H5F_ACC_TRUNC));
81+
} else {
82+
file = H5::H5File(output_file.c_str(), H5F_ACC_RDWR);
83+
H5::Group g = file.createGroup(group.value().c_str());
84+
f_p = std::unique_ptr<H5::Group>(new H5::Group(g));
85+
}
86+
87+
H5::Group& f = *f_p;
88+
89+
nlohmann::json user_keys;
90+
user_keys["comment"] = comment;
91+
92+
auto x = binsparse::__detail::mmread_array<float>(input_file);
93+
binsparse::write_dense_vector(f, std::span(x), user_keys);
94+
std::cout << "Writing to binsparse file " << output_file << " as vector"
95+
<< std::endl;
96+
}
97+
98+
inline void convert_to_binsparse_vector(std::string input_file,
99+
std::string output_file,
100+
std::string type, std::string comment,
101+
std::optional<std::string> group = {}) {
102+
if (type == "real") {
103+
convert_to_binsparse_vector<float>(input_file, output_file, type, comment,
104+
group);
105+
} else if (type == "integer") {
106+
convert_to_binsparse_vector<int64_t>(input_file, output_file, type, comment,
107+
group);
108+
} else {
109+
throw std::runtime_error("convert_to_binsparse_vector: unsupported type");
110+
}
111+
}
112+
70113
int main(int argc, char** argv) {
71114

72115
if (argc < 3) {
@@ -96,35 +139,41 @@ int main(int argc, char** argv) {
96139
group = argv[4];
97140
}
98141

99-
auto [m, n, nnz, type, structure, comment] =
142+
auto [m, n, nnz, mm_format, type, structure, comment] =
100143
binsparse::mmread_metadata(input_file);
101144

102-
std::cout << "Matrix is " << m << " x " << n << " with " << nnz
103-
<< " values.\n";
104-
std::cout << "Type: " << type << std::endl;
105-
std::cout << "Structure: " << structure << std::endl;
106-
std::cout << "Comment:\n";
107-
std::cout << comment;
108-
109-
assert(format == "COO" || format == "CSR");
110-
111-
auto max_size = std::max({m, n, nnz});
112-
113-
if (max_size + 1 <= std::numeric_limits<uint8_t>::max()) {
114-
convert_to_binsparse<uint8_t>(input_file, output_file, type, format,
115-
comment, group);
116-
} else if (max_size + 1 <= std::numeric_limits<uint16_t>::max()) {
117-
convert_to_binsparse<uint16_t>(input_file, output_file, type, format,
118-
comment, group);
119-
} else if (max_size + 1 <= std::numeric_limits<uint32_t>::max()) {
120-
convert_to_binsparse<uint32_t>(input_file, output_file, type, format,
121-
comment, group);
122-
} else if (max_size + 1 <= std::numeric_limits<uint64_t>::max()) {
123-
convert_to_binsparse<uint64_t>(input_file, output_file, type, format,
124-
comment, group);
145+
if (mm_format == "coordinate") {
146+
std::cout << "Matrix is " << m << " x " << n << " with " << nnz
147+
<< " values.\n";
148+
std::cout << "Type: " << type << std::endl;
149+
std::cout << "Structure: " << structure << std::endl;
150+
std::cout << "Comment:\n";
151+
std::cout << comment;
152+
153+
assert(format == "COO" || format == "CSR");
154+
155+
auto max_size = std::max({m, n, nnz});
156+
157+
if (max_size + 1 <= std::numeric_limits<uint8_t>::max()) {
158+
convert_to_binsparse<uint8_t>(input_file, output_file, type, format,
159+
comment, group);
160+
} else if (max_size + 1 <= std::numeric_limits<uint16_t>::max()) {
161+
convert_to_binsparse<uint16_t>(input_file, output_file, type, format,
162+
comment, group);
163+
} else if (max_size + 1 <= std::numeric_limits<uint32_t>::max()) {
164+
convert_to_binsparse<uint32_t>(input_file, output_file, type, format,
165+
comment, group);
166+
} else if (max_size + 1 <= std::numeric_limits<uint64_t>::max()) {
167+
convert_to_binsparse<uint64_t>(input_file, output_file, type, format,
168+
comment, group);
169+
} else {
170+
throw std::runtime_error(
171+
"Error! Matrix dimensions or NNZ too large to handle.");
172+
}
173+
} else if (mm_format == "array" && n == 1) {
174+
convert_to_binsparse_vector(input_file, output_file, type, comment, group);
125175
} else {
126-
throw std::runtime_error(
127-
"Error! Matrix dimensions or NNZ too large to handle.");
176+
throw std::runtime_error("Encountered unsupported MatrixMarket format");
128177
}
129178

130179
return 0;

include/binsparse/binsparse.hpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,53 @@ namespace binsparse {
1515

1616
inline constexpr double version = 0.1;
1717

18+
template <typename T>
19+
void write_dense_vector(H5::Group& f, std::span<T> v,
20+
nlohmann::json user_keys = {}) {
21+
hdf5_tools::write_dataset(f, "values", v);
22+
23+
using json = nlohmann::json;
24+
json j;
25+
j["binsparse"]["version"] = version;
26+
j["binsparse"]["format"] = "DVEC";
27+
j["binsparse"]["shape"] = {v.size()};
28+
j["binsparse"]["nnz"] = v.size();
29+
j["binsparse"]["data_types"]["values"] = type_info<T>::label();
30+
31+
for (auto&& v : user_keys.items()) {
32+
j[v.key()] = v.value();
33+
}
34+
35+
hdf5_tools::set_attribute(f, "binsparse", j.dump(2));
36+
}
37+
38+
template <typename T, typename Allocator = std::allocator<T>>
39+
auto read_dense_vector(std::string fname, Allocator&& alloc = Allocator{}) {
40+
H5::H5File f(fname.c_str(), H5F_ACC_RDWR);
41+
42+
auto metadata = hdf5_tools::get_attribute(f, "binsparse");
43+
44+
using json = nlohmann::json;
45+
auto data = json::parse(metadata);
46+
47+
auto binsparse_metadata = data["binsparse"];
48+
49+
auto format = __detail::unalias_format(binsparse_metadata["format"]);
50+
51+
assert(format == "DVEC");
52+
53+
auto nvalues = binsparse_metadata["shape"][0];
54+
auto nnz = binsparse_metadata["nnz"];
55+
56+
assert(nvalues == nnz);
57+
58+
auto values = hdf5_tools::read_dataset<T>(f, "values", alloc);
59+
60+
assert(values.size() == nvalues);
61+
62+
return values;
63+
}
64+
1865
// Dense Format
1966

2067
template <typename T, typename I, typename Order>

include/binsparse/matrix_market/matrix_market_inspector.hpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ inline auto mmread_metadata(std::string file_path) {
5151

5252
// Read in coordinate / array
5353
ss >> item;
54+
std::string format = item;
5455

5556
// Read in type of matrix (real / integer / complex / pattern)
5657
ss >> item;
@@ -77,9 +78,14 @@ inline auto mmread_metadata(std::string file_path) {
7778

7879
ss.clear();
7980
ss.str(buf);
80-
ss >> m >> n >> nnz;
81+
ss >> m >> n;
82+
if (format == "coordinate") {
83+
ss >> nnz;
84+
} else {
85+
nnz = m * n;
86+
}
8187

82-
return std::tuple(m, n, nnz, type, structure, comment);
88+
return std::tuple(m, n, nnz, format, type, structure, comment);
8389
}
8490

8591
} // namespace binsparse

include/binsparse/matrix_market/matrix_market_read.hpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,95 @@ inline MatrixType mmread(std::string file_path, bool one_indexed = true) {
294294
return m_out;
295295
}
296296

297+
/// Read in the Matrix Market file at location `file_path` and
298+
/// return a data structure with the matrix.
299+
template <typename T>
300+
inline std::vector<T> mmread_array(std::string file_path,
301+
bool one_indexed = true) {
302+
using size_type = std::size_t;
303+
using I = std::size_t;
304+
305+
std::ifstream f;
306+
307+
f.open(file_path.c_str());
308+
309+
if (!f.is_open()) {
310+
// TODO better choice of exception.
311+
throw std::runtime_error("mmread: cannot open " + file_path);
312+
}
313+
314+
std::string buf;
315+
316+
// Make sure the file is matrix market matrix, coordinate, and check whether
317+
// it is symmetric. If the matrix is symmetric.
318+
// Error out if skew-symmetric or Hermitian.
319+
std::getline(f, buf);
320+
std::istringstream ss(buf);
321+
std::string item;
322+
ss >> item;
323+
if (item != "%%MatrixMarket") {
324+
throw std::runtime_error(file_path +
325+
" could not be parsed as a Matrix Market file.");
326+
}
327+
ss >> item;
328+
if (item != "matrix") {
329+
throw std::runtime_error(file_path +
330+
" could not be parsed as a Matrix Market file.");
331+
}
332+
ss >> item;
333+
if (item != "array") {
334+
throw std::runtime_error(file_path +
335+
" could not be parsed as a Matrix Market file.");
336+
}
337+
ss >> item;
338+
assert(item != "pattern");
339+
340+
ss >> item;
341+
assert(item == "general");
342+
343+
bool outOfComments = false;
344+
while (!outOfComments) {
345+
std::getline(f, buf);
346+
347+
if (buf[0] != '%') {
348+
outOfComments = true;
349+
}
350+
}
351+
352+
I m, n, nnz;
353+
// std::istringstream ss(buf);
354+
ss.clear();
355+
ss.str(buf);
356+
ss >> m >> n;
357+
nnz = m * n;
358+
359+
std::vector<T> m_out(m * n);
360+
361+
constexpr bool pattern = true;
362+
363+
size_type c = 0;
364+
while (std::getline(f, buf)) {
365+
T v;
366+
std::istringstream ss(buf);
367+
ss >> v;
368+
369+
I i = c % m;
370+
I j = c / m;
371+
372+
m_out[i * n + j] = v;
373+
374+
c++;
375+
if (c > nnz) {
376+
throw std::runtime_error("read_MatrixMarket: error reading Matrix Market "
377+
"file, file has more nonzeros than reported.");
378+
}
379+
}
380+
381+
f.close();
382+
383+
return m_out;
384+
}
385+
297386
} // namespace __detail
298387

299388
} // namespace binsparse

0 commit comments

Comments
 (0)