Code hinzugefügt
This commit is contained in:
parent
c2405e1175
commit
e4d4eb3f30
3 changed files with 315 additions and 0 deletions
15
CMakeLists.txt
Normal file
15
CMakeLists.txt
Normal file
|
@ -0,0 +1,15 @@
|
|||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
project(TracExtractor VERSION 1.0.0)
|
||||
|
||||
find_package(OpenSSL REQUIRED)
|
||||
find_package(SQLite3 REQUIRED)
|
||||
|
||||
add_executable(TracExtractor
|
||||
extract.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(TracExtractor PRIVATE
|
||||
OpenSSL::Crypto
|
||||
SQLite::SQLite3
|
||||
)
|
|
@ -0,0 +1,6 @@
|
|||
# Trac-Export
|
||||
|
||||
Es wird eine Kopie der Trac-Datenbanken benötigt. Die ausführbare Datei wird mit dem Pfad zum Trac-Ordner und mit dem Namen des Sub-Tracs aufgerufen.
|
||||
Benötigt werden pandoc, sqlite und OpenSSL.
|
||||
|
||||
Der Code hier drin ist nicht premium (an einem Tag geschrieben), aber reicht aus, um die Daten als lesbare Markdown zusammen mit den Anhängen aus der Datenbank zu kriegen.
|
294
extract.cpp
Normal file
294
extract.cpp
Normal file
|
@ -0,0 +1,294 @@
|
|||
#include <cstdint>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <sqlite3.h>
|
||||
#include <openssl/evp.h>
|
||||
#include <openssl/types.h>
|
||||
|
||||
|
||||
struct Data {
|
||||
std::string id;
|
||||
uint32_t version = 0;
|
||||
std::string content;
|
||||
};
|
||||
|
||||
|
||||
struct Datb {
|
||||
std::string page;
|
||||
std::string filename;
|
||||
};
|
||||
|
||||
|
||||
struct Datc {
|
||||
uint32_t id;
|
||||
std::string component;
|
||||
std::string summary;
|
||||
std::string description;
|
||||
std::string status;
|
||||
};
|
||||
|
||||
|
||||
|
||||
class Database {
|
||||
|
||||
private:
|
||||
sqlite3* m_db;
|
||||
|
||||
public:
|
||||
Database(std::string const& filepath);
|
||||
~Database() noexcept;
|
||||
|
||||
public:
|
||||
std::unordered_map<std::string, Data> query_wiki();
|
||||
std::vector<Datb> query_attachments(std::string const& type);
|
||||
std::vector<Datc> query_tickets();
|
||||
|
||||
private:
|
||||
void check(int rc);
|
||||
};
|
||||
|
||||
|
||||
Database::Database(std::string const& filepath)
|
||||
{
|
||||
this->check(::sqlite3_open(filepath.c_str(), &m_db));
|
||||
}
|
||||
|
||||
Database::~Database() noexcept
|
||||
{
|
||||
this->check(::sqlite3_close(m_db));
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, Data> Database::query_wiki()
|
||||
{
|
||||
sqlite3_stmt * stmt;
|
||||
char const * tail = 0;
|
||||
|
||||
std::unordered_map<std::string, Data> data;
|
||||
|
||||
this->check(::sqlite3_prepare_v2(m_db, "SELECT name, version, text FROM wiki", -1, &stmt, &tail));
|
||||
int sqlrc = ::sqlite3_step(stmt);
|
||||
while (sqlrc == SQLITE_ROW) {
|
||||
std::string id = (char const*)::sqlite3_column_text(stmt, 0);
|
||||
uint32_t version = ::sqlite3_column_int64(stmt, 1);
|
||||
std::string content = (char const*)::sqlite3_column_text(stmt, 2);
|
||||
if (data[id].version < version) {
|
||||
data[id].id = id;
|
||||
data[id].version = version;
|
||||
data[id].content = content;
|
||||
}
|
||||
sqlrc = ::sqlite3_step(stmt);
|
||||
}
|
||||
this->check(sqlrc);
|
||||
this->check(::sqlite3_finalize(stmt));
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
std::vector<Datb> Database::query_attachments(std::string const& type)
|
||||
{
|
||||
sqlite3_stmt * stmt;
|
||||
char const * tail = 0;
|
||||
|
||||
std::vector<Datb> data;
|
||||
|
||||
this->check(::sqlite3_prepare_v2(m_db, "SELECT id, filename FROM attachment WHERE type = (?)", -1, &stmt, &tail));
|
||||
this->check(::sqlite3_bind_text(stmt, 1, type.c_str(), -1, SQLITE_TRANSIENT));
|
||||
int sqlrc = ::sqlite3_step(stmt);
|
||||
while (sqlrc == SQLITE_ROW) {
|
||||
std::string id = (char const*)::sqlite3_column_text(stmt, 0);
|
||||
std::string filename = (char const*)::sqlite3_column_text(stmt, 1);
|
||||
Datb dat;
|
||||
dat.page = id;
|
||||
dat.filename = filename;
|
||||
data.push_back(dat);
|
||||
sqlrc = ::sqlite3_step(stmt);
|
||||
}
|
||||
this->check(sqlrc);
|
||||
this->check(::sqlite3_finalize(stmt));
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
std::vector<Datc> Database::query_tickets()
|
||||
{
|
||||
sqlite3_stmt * stmt;
|
||||
char const * tail = 0;
|
||||
|
||||
std::vector<Datc> data;
|
||||
|
||||
this->check(::sqlite3_prepare_v2(m_db, "SELECT id, component, summary, description, status FROM ticket", -1, &stmt, &tail));
|
||||
int sqlrc = ::sqlite3_step(stmt);
|
||||
while (sqlrc == SQLITE_ROW) {
|
||||
Datc dat;
|
||||
dat.id = ::sqlite3_column_int64(stmt, 0);
|
||||
dat.component = (char const*)::sqlite3_column_text(stmt, 1);
|
||||
dat.summary = (char const*)::sqlite3_column_text(stmt, 2);
|
||||
dat.description = (char const*)::sqlite3_column_text(stmt, 3);
|
||||
dat.status = (char const*)::sqlite3_column_text(stmt, 4);
|
||||
data.push_back(dat);
|
||||
|
||||
sqlrc = ::sqlite3_step(stmt);
|
||||
}
|
||||
this->check(sqlrc);
|
||||
this->check(::sqlite3_finalize(stmt));
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
void Database::check(int rc)
|
||||
{
|
||||
if (rc != SQLITE_ROW && rc != SQLITE_DONE && rc != SQLITE_OK) {
|
||||
std::string msg = std::to_string(rc) + ", msg: " + sqlite3_errmsg(m_db) + ", err: " + sqlite3_errstr(rc);
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::string sha1(std::string const& data)
|
||||
{
|
||||
EVP_MD_CTX* ctx = ::EVP_MD_CTX_new();
|
||||
::EVP_DigestInit_ex(ctx, ::EVP_sha1(), nullptr);
|
||||
|
||||
::EVP_DigestUpdate(ctx, data.data(), data.size());
|
||||
uint8_t hash[EVP_MAX_MD_SIZE];
|
||||
unsigned int size = 0;
|
||||
::EVP_DigestFinal(ctx, hash, &size);
|
||||
|
||||
std::stringstream ss;
|
||||
for (uint32_t i=0; i<size; ++i) {
|
||||
ss << std::hex << std::setw(2) << std::setfill('0') << static_cast<uint32_t>(hash[i]);
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
void pandoc(std::filesystem::path path)
|
||||
{
|
||||
auto out = path;
|
||||
out.replace_extension("md");
|
||||
std::printf("%s -> %s\n", path.string().c_str(), out.string().c_str());
|
||||
std::string cmd = "pandoc -f mediawiki -t markdown -o \"" + out.string() + "\" \"" + path.string() + "\"";
|
||||
std::system(cmd.c_str());
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc < 3) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::string trac = argv[1];
|
||||
std::string subfolder = argv[2];
|
||||
std::filesystem::path trac_dir = std::filesystem::absolute(trac);
|
||||
auto work_dir = std::filesystem::current_path();
|
||||
auto wiki_output = work_dir / subfolder / "wiki";
|
||||
auto tickets_output = work_dir / subfolder / "tickets";
|
||||
|
||||
std::vector<std::packaged_task<void()>> tasks;
|
||||
|
||||
auto db_path = trac_dir / subfolder / "trac.db";
|
||||
Database db(db_path.string());
|
||||
auto wdata = db.query_wiki();
|
||||
std::printf("%ld wiki pages\n", wdata.size());
|
||||
|
||||
std::filesystem::create_directories(wiki_output);
|
||||
std::filesystem::current_path(wiki_output);
|
||||
|
||||
for (auto const& [id, dat] : wdata) {
|
||||
std::filesystem::path path(id + ".wiki");
|
||||
if (!path.parent_path().empty()) {
|
||||
std::filesystem::create_directories(path.parent_path());
|
||||
}
|
||||
|
||||
std::ofstream file(path);
|
||||
if (!file.is_open()) {
|
||||
std::printf("Could not open file: %s\n", id.c_str());
|
||||
break;
|
||||
}
|
||||
file.write(dat.content.c_str(), dat.content.size());
|
||||
file.close();
|
||||
|
||||
tasks.push_back(std::packaged_task<void()>(std::bind(pandoc, std::filesystem::absolute(path))));
|
||||
}
|
||||
wdata = {};
|
||||
|
||||
auto wiki_files_dir = trac_dir / subfolder / "files" / "attachments" / "wiki";
|
||||
auto wadata = db.query_attachments("wiki");
|
||||
std::printf("%ld attachments\n", wadata.size());
|
||||
for (auto const& dat : wadata) {
|
||||
auto page_hash = sha1(dat.page);
|
||||
std::string group_dir = page_hash.substr(0, 3);
|
||||
auto file_hash = sha1(dat.filename);
|
||||
auto origin = wiki_files_dir / group_dir / page_hash / (file_hash + std::filesystem::path(dat.filename).extension().string());
|
||||
std::filesystem::path path(dat.page);
|
||||
path = path.parent_path();
|
||||
auto target = path.empty() ? dat.filename : (path / dat.filename).string();
|
||||
std::error_code ec;
|
||||
std::filesystem::copy(origin, target, ec);
|
||||
}
|
||||
wadata = {};
|
||||
|
||||
std::filesystem::create_directories(tickets_output);
|
||||
std::filesystem::current_path(tickets_output);
|
||||
|
||||
auto tdata = db.query_tickets();
|
||||
std::printf("%ld tickets\n", tdata.size());
|
||||
for (auto const& dat : tdata) {
|
||||
std::filesystem::create_directories(dat.component);
|
||||
|
||||
auto const path = std::filesystem::path(dat.component) / (std::to_string(dat.id) + ".wiki");
|
||||
std::ofstream file(path);
|
||||
if (!file.is_open()) {
|
||||
std::printf("Could not open file: %s\n", path.c_str());
|
||||
break;
|
||||
}
|
||||
file << "== " << dat.summary << " ==" << std::endl;
|
||||
file << std::endl;
|
||||
file << "'''Status:''' " << dat.status << std::endl;
|
||||
file << std::endl;
|
||||
file << dat.description << std::endl;
|
||||
file.close();
|
||||
|
||||
tasks.push_back(std::packaged_task<void()>(std::bind(pandoc, std::filesystem::absolute(path))));
|
||||
}
|
||||
tdata = {};
|
||||
|
||||
auto ticket_files_dir = trac_dir / subfolder / "files" / "attachments" / "ticket";
|
||||
auto tadata = db.query_attachments("ticket");
|
||||
std::printf("%ld attachments\n", tadata.size());
|
||||
for (auto const& dat : tadata) {
|
||||
auto page_hash = sha1(dat.page);
|
||||
std::string group_dir = page_hash.substr(0, 3);
|
||||
auto file_hash = sha1(dat.filename);
|
||||
auto origin = ticket_files_dir / group_dir / page_hash / (file_hash + std::filesystem::path(dat.filename).extension().string());
|
||||
std::filesystem::path path(dat.page);
|
||||
path = path.parent_path();
|
||||
auto target = path.empty() ? dat.filename : (path / dat.filename).string();
|
||||
std::error_code ec;
|
||||
std::filesystem::copy(origin, target, ec);
|
||||
}
|
||||
tadata = {};
|
||||
|
||||
uint32_t nthreads = std::thread::hardware_concurrency();
|
||||
std::vector<std::thread> threads(nthreads);
|
||||
for (uint32_t i=0; i<nthreads; ++i) {
|
||||
threads[i] = std::thread([i, nthreads, &tasks]{
|
||||
for (uint32_t j=i; j<tasks.size(); j+=nthreads) {
|
||||
tasks[j]();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (uint32_t i=0; i<nthreads; ++i) {
|
||||
if (threads[i].joinable()) {
|
||||
threads[i].join();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue