Есть программа.
#include <boost/hana.hpp>
#include <limits>
#include <array>
#include <sys/mman.h>
#include <unistd.h>
#include <cassert>
#include <filesystem>
#include <string_view>
#include <vector>
namespace hana = boost::hana;
namespace fs = std::filesystem;
using namespace hana::literals;
using sv = std::string_view;
using hana::_;
constexpr uint8_t swmap(uint8_t c) {
switch(c) {
case 'A': case 'a': return 'T';// 'A' | 'a' => 'T',
case 'C': case 'c': return 'G';// 'C' | 'c' => 'G',
case 'G': case 'g': return 'C';// 'G' | 'g' => 'C',
case 'T': case 't': return 'A';// 'T' | 't' => 'A',
case 'U': case 'u': return 'A';// 'U' | 'u' => 'A',
case 'M': case 'm': return 'K';// 'M' | 'm' => 'K',
case 'R': case 'r': return 'Y';// 'R' | 'r' => 'Y',
case 'W': case 'w': return 'W';// 'W' | 'w' => 'W',
case 'S': case 's': return 'S';// 'S' | 's' => 'S',
case 'Y': case 'y': return 'R';// 'Y' | 'y' => 'R',
case 'K': case 'k': return 'M';// 'K' | 'k' => 'M',
case 'V': case 'v': return 'B';// 'V' | 'v' => 'B',
case 'H': case 'h': return 'D';// 'H' | 'h' => 'D',
case 'D': case 'd': return 'H';// 'D' | 'd' => 'H',
case 'B': case 'b': return 'V';// 'B' | 'b' => 'V',
case 'N': case 'n': return 'N';// 'N' | 'n' => 'N',
default: return 0;
}
}
constexpr auto map = ([] {
constexpr auto max = std::numeric_limits<uint8_t>::max();
std::array<uint16_t, max * max> map{};
for(size_t it = 0; it < map.size(); ++it) {
uint8_t hi = (it >> 8), lo = it;
map[it] = (swmap(lo) << 8) | (swmap(hi));
}
return map;
})();
constexpr auto map256 = ([] {
constexpr auto max = std::numeric_limits<uint8_t>::max();
std::array<uint8_t, max> map{};
for(size_t it = 0; it < max; ++it)
map[it] = swmap(it);
return map;
})();
template<size_t noffset> void replace60(const char * in, char * out) {
constexpr auto offset = hana::llong_c<noffset>;
auto op = [&] {
*(uint16_t *)out = map[*(const uint16_t *)(in -= 2)];
out += 2;
};
auto tail_size = ((60_c - offset) / 2_c);
tail_size.times(op);
if constexpr(offset % 2_c) {
// ...1\n
// 0...
*out++ = map256[*(--in)];
--in;
// assert(*in == '\n');
*out++ = map256[*(--in)];
(29_c - tail_size).times(op);
} else {// even
// ...\n
// ...
in -= 1;
// assert(*in == '\n');
(30_c - tail_size).times(op);
}
*(out++) = '\n';
}
auto select_replace60 = [](std::string_view in) {
constexpr static auto replace60_map = ([] {
std::array<decltype(replace60<0>) *, 60> map{};
(60_c).times.with_index([&](auto index) {
map[index()] = replace60<index()>;
});
return map;
})();
auto first_pos = size(in) - 1;
assert(in.at(first_pos) == '\n');
auto diff = first_pos - in.find_last_of('\n', first_pos - 1);
assert(in.at(size(in) - diff - 1) == '\n');
return replace60_map.at(61 - diff);
};
void replace(sv data) {
auto op = select_replace60(data);
constexpr size_t line_size = 61;
constexpr size_t buff_size = line_size * 1024;
char buff[buff_size] = {};
auto n = size(data) / line_size;
auto tail = size(data) - (n * line_size);
auto it = end(data) - 1;
auto buff_it = std::begin(buff);
while(n--) {
op(it, buff_it);
buff_it += line_size;
it -= line_size;
if(buff_it == (std::end(buff) - line_size)) {
write(STDOUT_FILENO, buff, buff_size - line_size);
buff_it = buff;
}
}
if(tail) {
while(tail--) {
if(*(--it) == '\n') continue;
*buff_it++ = map256[*it];
}
*buff_it++ = '\n';
}
write(STDOUT_FILENO, buff, buff_it - std::begin(buff));
}
template<typename F> __attribute_noinline__ auto bench(std::string_view name, F && f, size_t setsize) {
auto start = std::chrono::high_resolution_clock::now();
f();
auto tp = std::chrono::high_resolution_clock::now() - start;
auto time = std::chrono::duration<double>{tp}.count();
fprintf(stderr, "%s: %fsec, %fGB/s\n", name.data(), time, (setsize / double(1ul << 30)) / time);
}
int main() {
fs::path path{"/dev/stdin"};
auto size = fs::file_size(path);
auto data = (const char *)mmap(nullptr, size + 4096, PROT_READ, MAP_PRIVATE|MAP_POPULATE, STDIN_FILENO, 0);
sv file{data, size};
auto next = [=, prev = 0ul]() mutable -> std::pair<sv, sv> {
auto arrow_pos = file.find_first_of('>', prev);
auto begin_pos = file.find_first_of('\n', arrow_pos);
if(begin_pos == sv::npos) return {};
prev = file.find_first_of('>', begin_pos);
return {file.substr(arrow_pos, begin_pos - arrow_pos + 1), file.substr(begin_pos + 1, prev - begin_pos - 1)};
};
std::vector<std::pair<sv, sv>> index;
for(auto pair = next(); pair != std::pair<sv, sv>{}; pair = next()) index.emplace_back(pair);
for(auto [name, data]: index) {
write(STDOUT_FILENO, std::data(name), std::size(name));
bench("replace", [data = data] { replace(data); }, data.size());
};
}
Есть вторая программа: https://benchmarksgame-team.pages.debian.net/benchmarksgame/program/revcomp-g...
Нужно сравнить их, собирать так(нужен буст) и gcc8.3 или новее.
g++ -Ofast -march=native -fwhole-program -std=gnu++2a -lstdc++fs main.cpp -o prog_name
Для того, что-бы забенчить - нужно взять этот код: https://benchmarksgame-team.pages.debian.net/benchmarksgame/program/fasta-gcc... Записать в fasta.c
Собрать так:
gcc -pipe -Wall -O3 -fomit-frame-pointer -march=native -fopenmp fasta.c -o fasta
Далее, нужно сделать следующие:
./fasta 100000000 > fasta_100000000.txt
time ./первая_программа 0 < fasta_100000000.txt > /dev/null
time ./вторая_программа 0 < fasta_100000000.txt > /dev/null
Запустить надо по 2-3 раза. Результаты напечатать сюда.