Skip to content

Commit 90f53ce

Browse files
authored
Merge pull request #53 from distributed-lab/feature/unspend_utxo_indexer
UTXO Indexer
2 parents e806742 + 902a341 commit 90f53ce

File tree

7 files changed

+388
-0
lines changed

7 files changed

+388
-0
lines changed

utxo_indexer/indexer/Cargo.toml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,15 @@ name = "indexer"
33
version = "0.1.0"
44
edition = "2024"
55

6+
[[bin]]
7+
name = "indexer"
8+
path = "src/main.rs"
9+
610
[dependencies]
11+
anyhow = "1.0.100"
12+
bincode = "2.0.1"
13+
bitcoin = "0.32.7"
14+
byteorder = "1.5.0"
15+
clap = { version = "4.5.51", features = ["derive"] }
16+
hex = "0.4.3"
17+
rocksdb = "0.24.0"
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
use crate::bitcoin_serialization::{decompress_amount, read_var_int_u32, read_var_int_u64};
2+
3+
use bitcoin::{
4+
Txid, VarInt, consensus,
5+
hashes::Hash,
6+
opcodes::all::{OP_CHECKSIG, OP_DUP, OP_EQUALVERIFY, OP_HASH160},
7+
};
8+
9+
#[allow(unused)]
10+
#[derive(Debug)]
11+
pub struct CoinKey {
12+
pub txid: Txid,
13+
pub vout: VarInt,
14+
}
15+
16+
impl CoinKey {
17+
const COIN_DELIMITER: u8 = 0x43; // 'C'
18+
19+
const MINIMUM_KEY_SIZE: usize = 1 + 32 + 1; // 1 byte prefix + 32 bytes txid + at least 1 byte vout
20+
21+
pub fn deserialize(data: &[u8]) -> Option<Self> {
22+
if data.len() < Self::MINIMUM_KEY_SIZE {
23+
return None;
24+
}
25+
26+
let txid = Txid::from_slice(&data[1..33]).ok()?;
27+
let vout = consensus::deserialize(&data[33..]).ok()?;
28+
29+
Some(Self { txid, vout })
30+
}
31+
32+
#[allow(unused)]
33+
pub fn serialize(&self) -> Vec<u8> {
34+
let mut result = Vec::with_capacity(1 + 32 + self.vout.size());
35+
result.push(Self::COIN_DELIMITER);
36+
result.extend_from_slice(self.txid.as_raw_hash().as_byte_array());
37+
result.extend_from_slice(&consensus::serialize(&self.vout));
38+
result
39+
}
40+
}
41+
42+
#[allow(unused)]
43+
#[derive(Debug)]
44+
pub struct CoinValue {
45+
pub height: u64,
46+
pub is_coinbase: bool,
47+
pub amount: u64,
48+
pub script_pubkey: Vec<u8>,
49+
}
50+
51+
impl CoinValue {
52+
const P2PKH_COAT_OF_ARMS: u64 = 0x00;
53+
const P2PKH_SCRIPT_LEN: usize = 25;
54+
const PKH_SIZE: usize = 20;
55+
56+
pub fn deserialize(data: &[u8]) -> Option<Self> {
57+
let mut cursor = 0;
58+
59+
let (code, consumed) = read_var_int_u32(data)?;
60+
61+
cursor += consumed;
62+
63+
let height = code >> 1;
64+
65+
let is_coinbase = (code & 1) == 1;
66+
67+
let (compressed_amount, consumed) = read_var_int_u32(&data[cursor..])?;
68+
cursor += consumed;
69+
70+
let amount = decompress_amount(compressed_amount as u64);
71+
72+
let (script_len, consumed) = read_var_int_u64(&data[cursor..])?;
73+
74+
// TODO: implement the other script types
75+
if Self::P2PKH_COAT_OF_ARMS != script_len {
76+
return None;
77+
}
78+
79+
cursor += consumed;
80+
81+
if data.len() < cursor + Self::PKH_SIZE {
82+
return None;
83+
}
84+
85+
let mut pkh = data[cursor..cursor + Self::PKH_SIZE].to_vec();
86+
87+
let mut script_pubkey = Vec::with_capacity(Self::P2PKH_SCRIPT_LEN);
88+
script_pubkey.push(OP_DUP.to_u8());
89+
script_pubkey.push(OP_HASH160.to_u8());
90+
script_pubkey.push(20);
91+
script_pubkey.append(&mut pkh);
92+
script_pubkey.push(OP_EQUALVERIFY.to_u8());
93+
script_pubkey.push(OP_CHECKSIG.to_u8());
94+
95+
Some(Self {
96+
height: height as u64,
97+
is_coinbase,
98+
amount,
99+
script_pubkey,
100+
})
101+
}
102+
}
103+
104+
#[cfg(test)]
105+
mod tests {
106+
use std::str::FromStr;
107+
108+
use bitcoin::Txid;
109+
110+
#[test]
111+
fn test_coin_key_deserialize() {
112+
let key =
113+
hex::decode("435a7b146bcde2a1f879c367fbbbac97a401aef899430182effb998d4ff1452a6d06")
114+
.unwrap();
115+
116+
let tx_id =
117+
Txid::from_str("6d2a45f14f8d99fbef82014399f8ae01a497acbbfb67c379f8a1e2cd6b147b5a")
118+
.unwrap();
119+
120+
let deserialized = super::CoinKey::deserialize(&key).unwrap();
121+
122+
assert_eq!(deserialized.txid, tx_id);
123+
assert_eq!(deserialized.vout, 6u32.into());
124+
}
125+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
pub fn deobfuscate(data: &[u8], obfuscation_key: &[u8]) -> Vec<u8> {
2+
if obfuscation_key.is_empty() {
3+
return data.to_vec();
4+
}
5+
6+
data.iter()
7+
.enumerate()
8+
.map(|(i, &byte)| byte ^ obfuscation_key[i % obfuscation_key.len()])
9+
.collect()
10+
}
11+
12+
pub fn read_var_int_u64(data: &[u8]) -> Option<(u64, usize)> {
13+
let mut result: u64 = 0;
14+
let mut consumed = 0;
15+
16+
for byte in data {
17+
consumed += 1;
18+
19+
if result > (u64::MAX >> 7) {
20+
return None;
21+
}
22+
23+
result = (result << 7) | u64::from(byte & 0x7F);
24+
if byte & 0x80 != 0x00 {
25+
if result == u64::MAX {
26+
return None;
27+
}
28+
29+
result += 1;
30+
} else {
31+
return Some((result, consumed));
32+
}
33+
}
34+
35+
None
36+
}
37+
38+
pub fn read_var_int_u32(data: &[u8]) -> Option<(u32, usize)> {
39+
let mut result: u32 = 0;
40+
let mut consumed = 0;
41+
42+
for byte in data {
43+
consumed += 1;
44+
45+
if result > (u32::MAX >> 7) {
46+
return None;
47+
}
48+
49+
result = (result << 7) | u32::from(byte & 0x7F);
50+
if byte & 0x80 != 0x00 {
51+
if result == u32::MAX {
52+
return None;
53+
}
54+
55+
result += 1;
56+
} else {
57+
return Some((result, consumed));
58+
}
59+
}
60+
61+
None
62+
}
63+
64+
pub fn decompress_amount(x: u64) -> u64 {
65+
if x == 0 {
66+
return 0;
67+
}
68+
69+
let mut x = x - 1;
70+
let e = (x % 10) as u32;
71+
x /= 10;
72+
73+
if e < 9 {
74+
let d = (x % 9) + 1;
75+
let n = x / 9;
76+
let n_full = n * 10 + d;
77+
78+
n_full * 10u64.pow(e)
79+
} else {
80+
(x + 1) * 10u64.pow(9)
81+
}
82+
}

utxo_indexer/indexer/src/cli.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
use clap::{Parser, Subcommand};
2+
3+
#[derive(Parser)]
4+
#[command(version, about = "Builds UTXO index", long_about = None)]
5+
pub struct Cli {
6+
#[command(subcommand)]
7+
pub command: Commands,
8+
}
9+
10+
#[derive(Subcommand)]
11+
pub enum Commands {
12+
/// Index the chainstate and build the UTXO index
13+
IndexChainstate {
14+
/// Path to the chainstate LevelDB directory
15+
#[arg(short, long)]
16+
chainstate_path: String,
17+
/// Path to output the UTXO index file
18+
#[arg(short, long)]
19+
output_path: String,
20+
},
21+
BuildMerkleRoot {
22+
/// Path to the UTXO index file
23+
#[arg(short, long)]
24+
utxo_index_path: String,
25+
},
26+
}

utxo_indexer/indexer/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

utxo_indexer/indexer/src/main.rs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
mod bitcoin_primitives;
2+
mod bitcoin_serialization;
3+
mod utils;
4+
5+
mod cli;
6+
7+
use anyhow::{Context, Result};
8+
use bitcoin::hashes::Hash;
9+
use clap::Parser;
10+
use rocksdb::{DB, IteratorMode, Options};
11+
12+
use bitcoin::hashes::sha256;
13+
14+
use crate::{
15+
bitcoin_primitives::{CoinKey, CoinValue},
16+
bitcoin_serialization::deobfuscate,
17+
utils::{P2PKH_UTXO_SIZE, load_utxos, save_utxos},
18+
};
19+
20+
const OBFUSCATION_KEY_DB_KEY: &[u8] = b"\x0e\x00obfuscate_key";
21+
22+
fn main() -> Result<()> {
23+
let cli = cli::Cli::parse();
24+
25+
match &cli.command {
26+
cli::Commands::IndexChainstate {
27+
chainstate_path,
28+
output_path,
29+
} => run_index_chainstate(chainstate_path.as_str(), output_path.as_str()),
30+
cli::Commands::BuildMerkleRoot { utxo_index_path } => {
31+
run_build_merkle_root(utxo_index_path.as_str())
32+
}
33+
}
34+
}
35+
36+
fn run_index_chainstate(chainstate_path: &str, output_path: &str) -> Result<()> {
37+
let mut opts = Options::default();
38+
opts.set_compression_type(rocksdb::DBCompressionType::Snappy);
39+
opts.create_if_missing(false);
40+
41+
let db = DB::open_for_read_only(&opts, chainstate_path, false)?;
42+
let obfuscation_key_entry = db
43+
.get(OBFUSCATION_KEY_DB_KEY)
44+
.context("DB is not reachable")?
45+
.context("obfuscation key is not present in DB")?;
46+
47+
println!(
48+
"Opened chainstate at {:?}. Using obfuscation key: {}",
49+
chainstate_path,
50+
hex::encode(&obfuscation_key_entry[1..])
51+
);
52+
println!("Parsing UTXO entries...");
53+
54+
let iter = db.iterator(IteratorMode::Start);
55+
56+
let mut p2pkh_count = 0;
57+
58+
let mut utxos: Vec<[u8; P2PKH_UTXO_SIZE]> = Vec::with_capacity(100_000_000); // theoretical max amount of P2PKH UTXO in near future
59+
60+
for (i, item) in iter.enumerate() {
61+
if i % 500_000 == 0 {
62+
println!(
63+
"Processed {} entries. Current P2PKH count: {}",
64+
i, p2pkh_count
65+
);
66+
}
67+
68+
let (key, value) = item.context("DB is not reachable and iterable")?;
69+
70+
let _coin_key = match CoinKey::deserialize(&key) {
71+
Some(ck) => ck,
72+
None => continue,
73+
};
74+
75+
let deobfuscated_value = deobfuscate(&value, &obfuscation_key_entry[1..]);
76+
77+
let coin_value = match CoinValue::deserialize(&deobfuscated_value) {
78+
Some(cv) => cv,
79+
None => continue,
80+
};
81+
82+
p2pkh_count += 1;
83+
84+
let mut utxo = Vec::with_capacity(P2PKH_UTXO_SIZE);
85+
utxo.extend_from_slice(&coin_value.amount.to_le_bytes());
86+
utxo.extend_from_slice(&coin_value.script_pubkey);
87+
88+
utxos.push(utxo.try_into().expect("UTXO size should match"));
89+
}
90+
91+
println!("Total P2PKH UTXO entries: {}", p2pkh_count);
92+
93+
save_utxos(&utxos, output_path)?;
94+
95+
println!("UTXO index saved to {}", output_path);
96+
97+
Ok(())
98+
}
99+
100+
fn run_build_merkle_root(utxo_index_path: &str) -> Result<()> {
101+
println!("Loading UTXO index from {}", utxo_index_path);
102+
103+
let utxos = load_utxos(utxo_index_path)?;
104+
105+
println!("Calculating Merkle root for {} UTXOs", utxos.len());
106+
107+
let mut merkle_tree_leaf_hashes: Vec<sha256::Hash> = Vec::with_capacity(utxos.len());
108+
for utxo in utxos {
109+
let leaf_hash = sha256::Hash::hash(&utxo);
110+
merkle_tree_leaf_hashes.push(leaf_hash);
111+
}
112+
113+
let root = bitcoin::merkle_tree::calculate_root(merkle_tree_leaf_hashes.into_iter())
114+
.expect("UTXO set should not be empty");
115+
116+
println!("Merkle root: {}", hex::encode(root.as_byte_array()));
117+
118+
Ok(())
119+
}

utxo_indexer/indexer/src/utils.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
use std::{
2+
fs::File,
3+
io::{BufReader, BufWriter},
4+
};
5+
6+
use anyhow::Result;
7+
8+
pub const P2PKH_UTXO_SIZE: usize = 8 + 25; // 8 bytes for amount, 25 bytes for P2PKH scriptPubKey
9+
10+
pub fn save_utxos(utxos: &Vec<[u8; P2PKH_UTXO_SIZE]>, path: &str) -> Result<()> {
11+
let file = File::create(path)?;
12+
let mut writer = BufWriter::new(file);
13+
bincode::encode_into_std_write(utxos, &mut writer, bincode::config::standard())?;
14+
15+
Ok(())
16+
}
17+
18+
pub fn load_utxos(path: &str) -> Result<Vec<[u8; P2PKH_UTXO_SIZE]>> {
19+
let file = File::open(path)?;
20+
let mut reader = BufReader::new(file);
21+
let utxos = bincode::decode_from_std_read(&mut reader, bincode::config::standard())?;
22+
23+
Ok(utxos)
24+
}

0 commit comments

Comments
 (0)