Files
proxmark3/tools/cryptorf/sma_multi.cpp

1127 lines
35 KiB
C++

/*
*
* SecureMemory recovery Multithread
*
* Copyright (C) 2010, Flavio D. Garcia, Peter van Rossum, Roel Verdult
* and Ronny Wichers Schreur. Radboud University Nijmegen
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Modifed Iceman, 2020
*/
#include "defines.h"
#include "cryptolib.h"
#include "util.h"
#include <stdio.h>
#include <time.h>
#include <iostream>
#include <vector>
#include <map>
#include <algorithm> // sort, max_element, random_shuffle, remove_if, lower_bound
#include <functional> // greater, bind2nd
#include <thread> // std::thread
#include <atomic>
#include <mutex>
using namespace std;
#ifdef _MSC_VER
#define inline __inline
#endif
/*
>./sm 4f794a463ff81d81 ffffffffffffffff 1234567812345678
SecureMemory simulator - (c) Radboud University Nijmegen
Authenticate
Gc: 4f 79 4a 46 3f f8 1d 81
Ci: ff ff ff ff ff ff ff ff
Q: 12 34 56 78 12 34 56 78
Ch: 88 c9 d4 46 6a 50 1a 87
Ci+1: de c2 ee 1b 1c 92 76 e9
Ks: de 88 c2 c9 ee d4 1b 46 1c 6a 92 50 76 1a e9 87
left: 1ddeac626
right: 19aba45
left-candidates bins:
004df8a64 (74)
0059ff7d5 (81)
00d2ff4ed (80)
032df8b12 (78)
0337b8b7d (87)
036f7b607 (77)
03a6f882a (79)
03b2ff59b (76)
04445c715 (74)
0452175be (80)
0b29f2a5b (78)
0f6c834fb (76)
0f78aac5b (75)
0f79c8d49 (78)
109691f61 (70)
159d1687e (86)
176e73456 (77)
1ddeac626 (92)
1facee6e5 (78)
2049ed469 (80)
205078bba (74)
31c277406 (81)
31c2777e6 (81)
3770cdaf3 (74)
48916e84e (77)
4ba9b6520 (78)
4ba9b653f (78)
4c51c6463 (82)
4c9432733 (76)
4e3d88819 (81)
4e3d88bf9 (81)
51c8755b5 (76)
5b2aeb858 (76)
5fb612b96 (80)
60531191a (78)
6221539d9 (92)
68918cba9 (79)
6c9a11672 (78)
6f696e09e (70)
7086372b6 (78)
7bade8a41 (82)
7c90849f8 (77)
7cc847482 (87)
*/
const uint64_t left_candidates[43] = {
0x6221539d9ull, 0x1ddeac626ull, 0x7cc847482ull, 0x0337b8b7dull,
0x159d1687eull, 0x7bade8a41ull, 0x4c51c6463ull, 0x4e3d88bf9ull,
0x4e3d88819ull, 0x31c2777e6ull, 0x31c277406ull, 0x0059ff7d5ull,
0x5fb612b96ull, 0x2049ed469ull, 0x0452175beull, 0x00d2ff4edull,
0x68918cba9ull, 0x03a6f882aull, 0x7086372b6ull, 0x6c9a11672ull,
0x60531191aull, 0x4ba9b653full, 0x4ba9b6520ull, 0x1facee6e5ull,
0x0f79c8d49ull, 0x0b29f2a5bull, 0x032df8b12ull, 0x7c90849f8ull,
0x48916e84eull, 0x176e73456ull, 0x036f7b607ull, 0x5b2aeb858ull,
0x51c8755b5ull, 0x4c9432733ull, 0x0f6c834fbull, 0x03b2ff59bull,
0x0f78aac5bull, 0x3770cdaf3ull, 0x205078bbaull, 0x04445c715ull,
0x004df8a64ull, 0x6f696e09eull, 0x109691f61ull
};
typedef struct {
uint64_t l;
uint64_t m;
uint64_t r;
nibble b0;
nibble b1;
nibble b1l;
nibble b1r;
nibble b1s;
bool invalid;
byte_t Gc[8];
}cs_t;
typedef cs_t* pcs;
typedef struct {
byte_t addition;
byte_t out;
} lookup_entry;
enum cipher_state_side {
CSS_LEFT,
CSS_RIGHT
};
void print_cs(const char* text,pcs s) {
int pos;
printf("%s",text);
for(pos=6;pos>=0;pos--)
printf(" %02x",(byte_t)(s->l>>(pos*5))&0x1f);
printf(" |");
for(pos=6;pos>=0;pos--)
printf(" %02x",(byte_t)(s->m>>(pos*7))&0x7f);
printf(" |");
for(pos=4;pos>=0;pos--)
printf(" %02x",(byte_t)(s->r>>(pos*5))&0x1f);
printf("\n");
}
static inline byte_t mod(byte_t a, byte_t m) {
// Just return the input when this is less or equal than the modular value
if (a<m) return a;
// Compute the modular value
a %= m;
// Return the funny value, when the output was now zero, return the modular value
return (a == 0) ? m : a;
}
/*
static inline byte_t bit_rotate_l(byte_t a, byte_t n_bits) {
// Rotate value a with the length of n_bits only 1 time
byte_t mask = (1 << n_bits) - 1;
return ((a << 1) | (a >> (n_bits - 1))) & mask;
}
static inline byte_t bit_rotate_r(byte_t a, byte_t n_bits) {
return ((a >> 1) | ((a&1) << (n_bits - 1)));
}
*/
#define BIT_ROL_MASK ((1 << 5) - 1)
#define BIT_ROL(a) ((((a) << 1) | ((a) >> 4)) & BIT_ROL_MASK)
#define BIT_ROR(a) (((a) >> 1) | (((a) & 1) << 4))
static byte_t lookup_left_substraction[0x400];
static byte_t lookup_right_subtraction[0x400];
static lookup_entry lookup_left[0x100000];
static lookup_entry lookup_right[0x8000];
static byte_t left_addition[0x100000];
static inline void init_lookup_left() {
byte_t b3, b6, temp;
int i, index;
for (i = 0; i <0x400; i++){
b6 = i & 0x1f;
b3 = (i >> 5) & 0x1f;
index = (b3 << 15) | b6;
// b6 = bit_rotate_l(b6, 5);
b6 = BIT_ROL(b6);
temp = mod(b3 + b6,0x1f);
left_addition[index] = temp;
lookup_left[index].addition = temp;
lookup_left[index].out = ((temp^b3) & 0x0f);
}
}
static inline void init_lookup_right() {
byte_t b16, b18, temp;
int i, index;
for(i = 0; i <0x400; i++) {
b18 = i & 0x1f;
b16 = (i >> 5) & 0x1f;
index = (b16 << 10) | b18;
temp = mod(b18 + b16,0x1f);
lookup_right[index].addition = temp;
lookup_right[index].out = ((temp^b16) & 0x0f);
}
}
static void init_lookup_left_substraction() {
for(int index = 0; index < 0x400 ; index++) {
byte_t b3 = (index >> 5 & 0x1f);
byte_t bx = (index & 0x1f);
//lookup_left_substraction[index] = bit_rotate_r(mod((bx+0x1f)-b3,0x1f),5);
lookup_left_substraction[index] = BIT_ROR( mod((bx + 0x1F) - b3, 0x1F));
}
}
static void init_lookup_right_substraction() {
for(int index = 0; index < 0x400 ; index++) {
int b16 = (index >>5);
byte_t bx = (index & 0x1f);
lookup_right_subtraction[index] = mod((bx + 0x1F) - b16, 0x1F);
}
}
static inline void previous_left(byte_t in, vector<cs_t> *candidate_states) {
pcs state;
size_t size = candidate_states->size();
for(size_t pos=0; pos<size; pos++) {
state = &((*candidate_states)[pos]);
byte_t bx = (byte_t)((state->l >> 30) & 0x1f);
unsigned b3 = (unsigned)(state->l >> 5) & 0x3e0;
state->l = (state->l << 5);
//Ignore impossible states
if (bx == 0) {
// Are we dealing with an impossible state?
if (b3 != 0) {
state->invalid = true;
} else {
// We only need to consider b6=0
state->l &= 0x7ffffffe0ull;
state->l ^= (((uint64_t)in & 0x1f) << 20);
}
} else {
byte_t b6 = lookup_left_substraction[b3|bx];
state->l = (state->l & 0x7ffffffe0ull) | b6;
state->l ^= (((uint64_t)in & 0x1f) << 20);
// Check if we have a second candidate
if (b6 == 0x1f) {
cs_t nstate = *state;
nstate.l &= 0x7ffffffe0ull;
candidate_states->push_back(nstate);
}
}
}
}
static inline void previous_right(byte_t in, vector<cs_t> *candidate_states) {
pcs state;
size_t size = candidate_states->size();
for(size_t pos=0; pos<size; pos++) {
state = &((*candidate_states)[pos]);
byte_t bx = (byte_t)((state->r >> 20) & 0x1f);
unsigned b16 = (unsigned)(state->r & 0x3e0);//(state->buffer_r >> 10) & 0x1f;
state->r = (state->r << 5);
// Ignore impossible states
if (bx == 0) {
if (b16 != 0) {
state->invalid=true;
} else {
// We only need to consider b18=0
state->r &= 0x1ffffe0ull;
state->r ^= (((uint64_t)in & 0xf8) << 12);
}
} else {
byte_t b18 = lookup_right_subtraction[b16|bx];
state->r = (state->r & 0x1ffffe0ull) | b18;
state->r ^= (((uint64_t)in & 0xf8) << 12);
//state->b_right = ((b14^b17) & 0x0f);
// Check if we have a second candidate
if (b18 == 0x1f) {
cs_t nstate = *state;
nstate.r &= 0x1ffffe0ull;
candidate_states->push_back(nstate);
}
}
}
}
static inline byte_t next_left_fast(byte_t in, uint64_t* left) {
if (in)
*left ^= ((in & 0x1f) << 20);
lookup_entry* lookup = &(lookup_left[((*left) & 0xf801f)]);
*left = (((*left) >> 5)| ((uint64_t)lookup->addition << 30));
return lookup->out;
}
static inline byte_t next_left_ksbyte(uint64_t* left) {
lookup_entry* lookup;
byte_t bt;
*left = (((*left) >> 5)| ((uint64_t)left_addition[((*left) & 0xf801f)] << 30));
lookup = &(lookup_left[((*left) & 0xf801f)]);
*left = (((*left) >> 5)| ((uint64_t)lookup->addition << 30));
bt = lookup->out << 4;
*left = (((*left) >> 5)| ((uint64_t)left_addition[((*left) & 0xf801f)] << 30));
lookup = &(lookup_left[((*left) & 0xf801f)]);
*left = (((*left) >> 5)| ((uint64_t)lookup->addition << 30));
bt |= lookup->out;
return bt;
}
static inline byte_t next_right_fast(byte_t in, uint64_t* right) {
if (in) *right ^= ((in&0xf8) << 12);
lookup_entry* lookup = &(lookup_right[((*right) & 0x7c1f)]);
*right = (((*right) >> 5) | (lookup->addition << 20));
return lookup->out;
}
static inline void sm_left_mask(const byte_t* ks, byte_t* mask, uint64_t rstate) {
for (uint8_t pos = 0; pos < 16; pos++) {
next_right_fast(0,&rstate);
byte_t bt = next_right_fast(0,&rstate) << 4;
next_right_fast(0,&rstate);
bt |= next_right_fast(0,&rstate);
// xor the bits with the keystream and count the "correct" bits
bt ^= ks[pos];
// Save the mask for the left produced bits
mask[pos] = bt;
}
}
std::atomic<bool> key_found{0};
std::atomic<uint64_t> key{0};
std::atomic<size_t> topbits{0};
std::mutex g_ice_mtx;
uint32_t g_num_cpus = std::thread::hardware_concurrency();
static void ice_sm_right_thread(
uint8_t offset,
uint8_t skips,
const byte_t* ks,
map<uint64_t,uint64_t>* bincstates,
byte_t* mask
) {
byte_t tmp_mask[16];
byte_t bt;
for (uint64_t counter = offset; counter < 0x2000000; counter += skips) {
// Reset the current bitcount of correct bits
size_t bits = 0;
// Copy the state we are going to test
uint64_t rstate = counter;
for (uint8_t pos = 0; pos < 16; pos++) {
next_right_fast(0, &rstate);
bt = next_right_fast(0, &rstate) << 4;
next_right_fast(0, &rstate);
bt |= next_right_fast(0, &rstate);
// xor the bits with the keystream and count the "correct" bits
bt ^= ks[pos];
// Save the mask for the left produced bits
tmp_mask[pos] = bt;
// When the bit is xored away (=zero), it was the same, so correct ;)
if ((bt & 0x01) == 0) bits++;
if (((bt >> 1) & 0x01) == 0) bits++;
if (((bt >> 2) & 0x01) == 0) bits++;
if (((bt >> 3) & 0x01) == 0) bits++;
if (((bt >> 4) & 0x01) == 0) bits++;
if (((bt >> 5) & 0x01) == 0) bits++;
if (((bt >> 6) & 0x01) == 0) bits++;
if (((bt >> 7) & 0x01) == 0) bits++;
}
g_ice_mtx.lock();
if (bits > topbits.load(std::memory_order_relaxed)) {
// Copy the winning mask
topbits = bits;
memcpy(mask, tmp_mask, 16);
}
g_ice_mtx.unlock();
// Ignore states under 90
if (bits >= 90) {
// Make sure the bits are used for ordering
g_ice_mtx.lock();
if (bincstates->find((((uint64_t)bits) << 56) | counter) != bincstates->end())
bincstates->at((((uint64_t)bits) << 56) | counter) = counter;
else
bincstates->insert( std::pair<uint64_t,uint64_t>( (((uint64_t)bits) << 56) | counter, counter));
g_ice_mtx.unlock();
}
if ((counter & 0xfffff) == 0) {
g_ice_mtx.lock();
printf(".");
fflush(stdout);
g_ice_mtx.unlock();
}
}
}
static uint32_t ice_sm_right(const byte_t* ks, byte_t* mask, vector<uint64_t>* pcrstates) {
uint32_t g_num_cpus = std::thread::hardware_concurrency();
map<uint64_t,uint64_t> bincstates;
topbits = ATOMIC_VAR_INIT(0);
std::vector<std::thread> threads(g_num_cpus);
for (uint8_t m = 0; m < g_num_cpus; m++) {
threads[m] = std::thread( ice_sm_right_thread, m, g_num_cpus, ks, &bincstates, mask);
}
for (auto& t : threads) {
t.join();
}
printf("\n");
// Clear the candidate state vector
pcrstates->clear();
// Copy the order the states from lowest-bin to highest-bin
map<uint64_t,uint64_t>::iterator it;
for (it = bincstates.begin(); it != bincstates.end(); ++it) {
pcrstates->push_back(it->second);
}
// Reverse the vector order (so the higest bin comes first)
reverse(pcrstates->begin(), pcrstates->end());
return topbits;
}
static void ice_sm_left_thread(
uint8_t offset,
uint8_t skips,
const byte_t* ks,
map<uint64_t, cs_t>* bincstates,
byte_t* mask
) {
size_t pos, bits;
byte_t correct_bits[16];
byte_t bt;
lookup_entry* lookup;
// Reset and initialize the cryptostate and vector
cs_t state;
memset(&state, 0x00, sizeof(cs_t));
state.invalid = false;
for (uint64_t counter = offset; counter < 0x800000000ull; counter += skips) {
uint64_t lstate = counter;
for (pos = 0; pos < 16; pos++) {
lstate = (((lstate) >> 5)| ((uint64_t)left_addition[((lstate) & 0xf801f)] << 30));
lookup = &(lookup_left[((lstate) & 0xf801f)]);
lstate = (((lstate) >> 5)| ((uint64_t)lookup->addition << 30));
bt = lookup->out << 4;
lstate = (((lstate) >> 5)| ((uint64_t)left_addition[((lstate) & 0xf801f)] << 30));
lookup = &(lookup_left[((lstate) & 0xf801f)]);
lstate = (((lstate) >> 5)| ((uint64_t)lookup->addition << 30));
bt |= lookup->out;
// xor the bits with the keystream and count the "correct" bits
bt ^= ks[pos];
// When the REQUIRED bits are NOT xored away (=zero), ignore this wrong state
if ((bt & mask[pos]) != 0) break;
// Save the correct bits for statistical information
correct_bits[pos] = bt;
}
// If we have parsed all 16 bytes of keystream, we have a valid CANDIDATE!
if (pos == 16) {
// Count the total correct bits
bits = 0;
for (pos = 0; pos < 16; pos++) {
// Get the next byte-value with correct bits
bt = correct_bits[pos];
// Count all the (correct) bits
// When the bit is xored away (=zero), it was the same, so correct ;)
if ((bt & 0x01) == 0) bits++;
if (((bt >> 1) & 0x01) == 0) bits++;
if (((bt >> 2) & 0x01) == 0) bits++;
if (((bt >> 3) & 0x01) == 0) bits++;
if (((bt >> 4) & 0x01) == 0) bits++;
if (((bt >> 5) & 0x01) == 0) bits++;
if (((bt >> 6) & 0x01) == 0) bits++;
if (((bt >> 7) & 0x01) == 0) bits++;
}
state.l = counter;
// Make sure the bits are used for ordering
g_ice_mtx.lock();
printf(".");
fflush(stdout);
if (bincstates->find((((uint64_t)bits) << 56) | counter) != bincstates->end())
bincstates->at((((uint64_t)bits) << 56) | counter) = state;
else
bincstates->insert( std::pair<uint64_t,cs_t>( (((uint64_t)bits) << 56) | counter, state));
g_ice_mtx.unlock();
}
if ((counter & 0xffffffffull) == 0) {
g_ice_mtx.lock();
printf("%02.1f%%.", ((float)100/8) * (counter >> 32));
fflush(stdout);
g_ice_mtx.unlock();
}
}
}
static void ice_sm_left(const byte_t* ks, byte_t* mask, vector<cs_t>* pcstates) {
uint32_t g_num_cpus = std::thread::hardware_concurrency();
map<uint64_t, cs_t> bincstates;
std::vector<std::thread> threads(g_num_cpus);
for (uint8_t m = 0; m < g_num_cpus; m++) {
threads[m] = std::thread( ice_sm_left_thread, m, g_num_cpus, ks, &bincstates, mask);
}
for (auto& t : threads) {
t.join();
}
printf("100%%\n");
// Clear the candidate state vector
pcstates->clear();
// Copy the order the states from lowest-bin to highest-bin
map<uint64_t, cs_t>::iterator it;
for(it = bincstates.begin(); it != bincstates.end(); ++it) {
pcstates->push_back(it->second);
}
// Reverse the vector order (so the higest bin comes first)
reverse(pcstates->begin(), pcstates->end());
}
static inline uint32_t sm_right(const byte_t* ks, byte_t* mask, vector<uint64_t>* pcrstates) {
byte_t tmp_mask[16];
size_t pos, bits, bit, topbits;
map<uint64_t,uint64_t> bincstates;
map<uint64_t,uint64_t>::iterator it;
byte_t bt;
topbits = 0;
for (uint64_t counter = 0; counter < 0x2000000; counter++) {
// Reset the current bitcount of correct bits
bits = 0;
// Copy the state we are going to test
uint64_t rstate = counter;
for (pos = 0; pos < 16; pos++) {
next_right_fast(0, &rstate);
bt = next_right_fast(0, &rstate) << 4;
next_right_fast(0, &rstate);
bt |= next_right_fast(0, &rstate);
// xor the bits with the keystream and count the "correct" bits
bt ^= ks[pos];
// Save the mask for the left produced bits
tmp_mask[pos] = bt;
for (bit = 0; bit < 8; bit++) {
// When the bit is xored away (=zero), it was the same, so correct ;)
if ((bt & 0x01) == 0) bits++;
bt >>= 1;
}
}
if (bits > topbits) {
topbits = bits;
// Copy the winning mask
memcpy(mask, tmp_mask, 16);
}
// Ignore states under 90
if (bits >= 90) {
// Make sure the bits are used for ordering
bincstates[(((uint64_t)bits) << 56) | counter] = counter;
}
if ((counter & 0xfffff) == 0) {
printf(".");
fflush(stdout);
}
}
printf("\n");
// Clear the candidate state vector
pcrstates->clear();
// Copy the order the states from lowest-bin to highest-bin
for (it = bincstates.begin(); it != bincstates.end(); ++it) {
pcrstates->push_back(it->second);
}
// Reverse the vector order (so the higest bin comes first)
reverse(pcrstates->begin(), pcrstates->end());
return topbits;
}
static inline void previous_all_input(vector<cs_t> *pcstates, uint32_t gc_byte_index, cipher_state_side css) {
byte_t btGc,in;
vector<cs_t> ncstates;
vector<cs_t> prev_ncstates;
vector<cs_t>::iterator it,itnew;
// Loop through the complete entryphy of 5 bits for each candidate
// We ignore zero (xor 0x00) to avoid duplicates
for (btGc=0; btGc<0x20; btGc++) {
// Copy the original candidates that are supplied
ncstates = *pcstates;
// Rollback the (candidate) cipher states with this input
if (css == CSS_RIGHT) {
in = btGc << 3;
previous_right(in,&ncstates);
} else {
in = btGc;
previous_left(in,&ncstates);
}
for(itnew = ncstates.begin(); itnew != ncstates.end(); ++itnew) {
// Wipe away the invalid states
if (itnew->invalid == false) {
itnew->Gc[gc_byte_index] = in;
prev_ncstates.push_back(*itnew);
}
}
}
// Copy the previous states into the vector
*pcstates = prev_ncstates;
}
static inline void search_gc_candidates_right(const uint64_t rstate_before_gc, const uint64_t rstate_after_gc, const byte_t* Q, vector<cs_t>* pcstates) {
vector<cs_t>::iterator it;
vector<cs_t> csl_cand;
map<uint64_t,uint64_t> matchbox;
map<uint64_t,uint64_t>::iterator itmatch;
uint64_t rstate;
size_t counter;
cs_t state;
// Generate 2^20 different (5 bits) values for the first 4 Gc bytes (0,1,2,3)
for (counter=0; counter<0x100000; counter++) {
rstate = rstate_before_gc;
next_right_fast((counter >> 12) & 0xf8,&rstate);
next_right_fast((counter >> 7) & 0xf8,&rstate);
next_right_fast(Q[4],&rstate);
next_right_fast((counter >> 2) & 0xf8,&rstate);
next_right_fast((counter << 3) & 0xf8,&rstate);
next_right_fast(Q[5],&rstate);
matchbox[rstate] = counter;
}
// Reset and initialize the cryptostate and vecctor
memset(&state,0x00,sizeof(cs_t));
state.invalid = false;
state.r = rstate_after_gc;
csl_cand.clear();
csl_cand.push_back(state);
// Generate 2^20(+splitting) different (5 bits) values for the last 4 Gc bytes (4,5,6,7)
previous_right(Q[7],&csl_cand);
previous_all_input(&csl_cand,7,CSS_RIGHT);
previous_all_input(&csl_cand,6,CSS_RIGHT);
previous_right(Q[6],&csl_cand);
previous_all_input(&csl_cand,5,CSS_RIGHT);
previous_all_input(&csl_cand,4,CSS_RIGHT);
pcstates->clear();
// Take the intersection of the corresponding states ~2^15 values (40-25 = 15 bits)
for (it=csl_cand.begin();it!=csl_cand.end();++it) {
itmatch = matchbox.find(it->r);
if (itmatch != matchbox.end()) {
it->Gc[0] = (itmatch->second >> 12) & 0xf8;
it->Gc[1] = (itmatch->second >> 7) & 0xf8;
it->Gc[2] = (itmatch->second >> 2) & 0xf8;
it->Gc[3] = (itmatch->second << 3) & 0xf8;
pcstates->push_back(*it);
}
}
}
static inline void sm_left(const byte_t* ks, byte_t* mask, vector<cs_t>* pcstates) {
map<uint64_t, cs_t> bincstates;
map<uint64_t, cs_t>::iterator it;
uint64_t counter, lstate;
size_t pos, bits;
byte_t correct_bits[16];
byte_t bt;
cs_t state;
lookup_entry* lookup;
// Reset and initialize the cryptostate and vecctor
memset(&state, 0x00, sizeof(cs_t));
state.invalid = false;
for (counter = 0; counter < 0x800000000ull; counter++) {
lstate = counter;
for (pos = 0; pos < 16; pos++) {
lstate = (((lstate) >> 5)| ((uint64_t)left_addition[((lstate) & 0xf801f)] << 30));
lookup = &(lookup_left[((lstate) & 0xf801f)]);
lstate = (((lstate) >> 5)| ((uint64_t)lookup->addition << 30));
bt = lookup->out << 4;
lstate = (((lstate) >> 5)| ((uint64_t)left_addition[((lstate) & 0xf801f)] << 30));
lookup = &(lookup_left[((lstate) & 0xf801f)]);
lstate = (((lstate) >> 5)| ((uint64_t)lookup->addition << 30));
bt |= lookup->out;
// xor the bits with the keystream and count the "correct" bits
bt ^= ks[pos];
// When the REQUIRED bits are NOT xored away (=zero), ignore this wrong state
if ((bt & mask[pos]) != 0) break;
// Save the correct bits for statistical information
correct_bits[pos] = bt;
}
// If we have parsed all 16 bytes of keystream, we have a valid CANDIDATE!
if (pos == 16) {
// Count the total correct bits
bits = 0;
for (pos = 0; pos < 16; pos++) {
// Get the next byte-value with correct bits
bt = correct_bits[pos];
// Count all the (correct) bits
// When the bit is xored away (=zero), it was the same, so correct ;)
if ((bt & 0x01) == 0) bits++;
if (((bt >> 1) & 0x01) == 0) bits++;
if (((bt >> 2) & 0x01) == 0) bits++;
if (((bt >> 3) & 0x01) == 0) bits++;
if (((bt >> 4) & 0x01) == 0) bits++;
if (((bt >> 5) & 0x01) == 0) bits++;
if (((bt >> 6) & 0x01) == 0) bits++;
if (((bt >> 7) & 0x01) == 0) bits++;
}
// Print the left candidate
// printf("%09llx (%d)\n",counter,bits);
printf(".");
fflush(stdout);
state.l = counter;
// Make sure the bits are used for ordering
bincstates[(((uint64_t)bits)<<56) | counter] = state;
}
if ((counter & 0xffffffffull) == 0) {
printf("%02.1f%%.", ((float)100/8)*(counter>>32));
fflush(stdout);
}
}
printf("100%%\n");
// Clear the candidate state vector
pcstates->clear();
// Copy the order the states from lowest-bin to highest-bin
for(it = bincstates.begin(); it != bincstates.end(); ++it) {
pcstates->push_back(it->second);
}
// Reverse the vector order (so the higest bin comes first)
reverse(pcstates->begin(), pcstates->end());
}
static inline void search_gc_candidates_left(const uint64_t lstate_before_gc, const byte_t* Q, vector<cs_t>* pcstates) {
vector<cs_t> csl_cand,csl_search;
vector<cs_t>::iterator itsearch,itcand;
map<uint64_t,uint64_t> matchbox;
map<uint64_t,uint64_t>::iterator itmatch;
uint64_t lstate;
size_t counter;
// Generate 2^20 different (5 bits) values for the first 4 Gc bytes (0,1,2,3)
for (counter=0; counter < 0x100000; counter++) {
lstate = lstate_before_gc;
next_left_fast((counter >> 15) & 0x1f, &lstate);
next_left_fast((counter >> 10) & 0x1f, &lstate);
next_left_fast(Q[4], &lstate);
next_left_fast((counter >> 5) & 0x1f, &lstate);
next_left_fast(counter & 0x1f, &lstate);
next_left_fast(Q[5], &lstate);
matchbox[lstate] = counter;
}
// Copy the input candidate states and clean the output vector
csl_cand = *pcstates;
pcstates->clear();
for (itcand = csl_cand.begin(); itcand != csl_cand.end(); ++itcand) {
csl_search.clear();
csl_search.push_back(*itcand);
// Generate 2^20(+splitting) different (5 bits) values for the last 4 Gc bytes (4,5,6,7)
previous_left(Q[7], &csl_search);
previous_all_input(&csl_search, 7, CSS_LEFT);
previous_all_input(&csl_search, 6, CSS_LEFT);
previous_left(Q[6], &csl_search);
previous_all_input(&csl_search, 5, CSS_LEFT);
previous_all_input(&csl_search, 4, CSS_LEFT);
// Take the intersection of the corresponding states ~2^15 values (40-25 = 15 bits)
for(itsearch = csl_search.begin(); itsearch != csl_search.end(); ++itsearch) {
itmatch = matchbox.find(itsearch->l);
if (itmatch != matchbox.end()){
itsearch->Gc[0] = (itmatch->second >> 15) & 0x1f;
itsearch->Gc[1] = (itmatch->second >> 10) & 0x1f;
itsearch->Gc[2] = (itmatch->second >> 5) & 0x1f;
itsearch->Gc[3] = itmatch->second & 0x1f;
pcstates->push_back(*itsearch);
}
}
printf(".");
fflush(stdout);
}
printf("\n");
}
void combine_valid_left_right_states(vector<cs_t>* plcstates, vector<cs_t>* prcstates, vector<uint64_t>* pgc_candidates) {
vector<cs_t>::iterator itl, itr;
size_t pos,count;
uint64_t gc;
bool valid;
vector<cs_t> outer, inner;
if ( plcstates->size() > prcstates->size()) {
outer = *plcstates;
inner = *prcstates;
} else {
outer = *prcstates;
inner = *plcstates;
}
printf("Outer " _YELLOW_("%lu")" , inner " _YELLOW_("%lu") "\n", outer.size(), inner.size());
// Clean up the candidate list
pgc_candidates->clear();
count = 0;
for( itl = outer.begin(); itl != outer.end(); ++itl) {
for(itr = inner.begin(); itr != inner.end(); ++itr) {
valid = true;
// Check for left and right candidates that share the overlapping bits (8 x 2bits of Gc)
for (pos = 0; pos < 8; pos++) {
if ((itl->Gc[pos] & 0x18) != (itr->Gc[pos] & 0x18)) {
valid = false;
break;
}
}
if (valid) {
gc = 0;
for (pos = 0; pos < 8; pos++) {
gc <<= 8;
gc |= (itl->Gc[pos] | itr->Gc[pos]);
}
pgc_candidates->push_back(gc);
}
count++;
}
}
printf("Found a total of " _YELLOW_("%llu")" combinations, ",((unsigned long long)plcstates->size()) * prcstates->size());
printf("but only " _GREEN_("%lu")" were valid!\n", pgc_candidates->size());
}
static void ice_compare(
uint8_t offset,
uint8_t skips,
vector<uint64_t>* candidates,
crypto_state_t* ostate,
byte_t *Ci,
byte_t *Q,
byte_t *Ch,
byte_t *Ci_1
) {
byte_t Gc_chk[8];
byte_t Ch_chk[ 8];
byte_t Ci_1_chk[ 8];
for (std::size_t i = offset; i < candidates->size(); i += skips) {
if (key_found.load(std::memory_order_relaxed))
break;
uint64_t tkey = candidates->at(i);
num_to_bytes(tkey, 8, Gc_chk);
sm_auth(Gc_chk, Ci, Q, Ch_chk, Ci_1_chk, ostate);
if ((memcmp(Ch_chk, Ch, 8) == 0) && (memcmp(Ci_1_chk, Ci_1, 8) == 0)) {
key_found = true;
key = tkey;
break;
}
}
return;
}
int main(int argc, const char* argv[]) {
size_t pos;
crypto_state_t ostate;
uint64_t rstate_before_gc, rstate_after_gc;
uint64_t lstate_before_gc;
vector<uint64_t> rstates, lstates_after_gc, pgc_candidates;
vector<uint64_t>::iterator itrstates, itgc;
vector<cs_t> crstates;
vector<cs_t> clcandidates, clstates;
vector<cs_t>::iterator it;
uint32_t rbits;
// byte_t Gc[ 8] = {0x4f,0x79,0x4a,0x46,0x3f,0xf8,0x1d,0x81};
// byte_t Gc[ 8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
// byte_t Ci[ 8] = {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff};
// byte_t Q[ 8] = {0x12,0x34,0x56,0x78,0x12,0x34,0x56,0x78};
byte_t Gc[ 8];
byte_t Ci[ 8];
byte_t Q[ 8];
byte_t Ch[ 8];
byte_t Ci_1[ 8];
// byte_t ks[16] = {0xde,0x88,0xc2,0xc9,0xee,0xd4,0x1b,0x46,0x1c,0x6a,0x92,0x50,0x76,0x1a,0xe9,0x87};
// byte_t mask[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
// byte_t mask[16] = {0x04,0xb0,0xe1,0x10,0xc0,0x33,0x44,0x20,0x20,0x00,0x70,0x8c,0x22,0x04,0x10,0x80};
byte_t ks[16];
byte_t mask[16];
ui64 nCi; // Card random
ui64 nQ; // Reader random
ui64 nCh; // Reader challange
ui64 nCi_1; // Card anwser
if ((argc != 2) && (argc != 5)) {
printf("SecureMemory recovery - (c) Radboud University Nijmegen\n\n");
printf("syntax: sma_multi simulate\n");
printf(" sma_multi <Ci> <Q> <Ch> <Ci+1>\n\n");
return 1;
}
printf(_CYAN_("\nAuthentication info\n\n"));
// Check if this is a simulation
if (argc == 2) {
// Generate random values for the key and randoms
srand((uint32_t)time(null));
for (pos = 0; pos<8; pos++) {
Gc[pos] = rand();
Ci[pos] = rand();
Q[pos] = rand();
}
sm_auth(Gc,Ci,Q,Ch,Ci_1,&ostate);
printf(" Gc: "); print_bytes(Gc,8);
} else {
sscanf(argv[1],"%016llx",&nCi); num_to_bytes(nCi,8,Ci);
sscanf(argv[2],"%016llx",&nQ); num_to_bytes(nQ,8,Q);
sscanf(argv[3],"%016llx",&nCh); num_to_bytes(nCh,8,Ch);
sscanf(argv[4],"%016llx",&nCi_1); num_to_bytes(nCi_1,8,Ci_1);
printf(" Gc: unknown\n");
}
for (pos = 0; pos<8; pos++) {
ks[2*pos] = Ci_1[pos];
ks[(2*pos)+1] = Ch[pos];
}
printf(" Ci: "); print_bytes(Ci,8);
printf(" Q: "); print_bytes(Q,8);
printf(" Ch: "); print_bytes(Ch,8);
printf("Ci+1: "); print_bytes(Ci_1,8);
printf("\n");
printf(" Ks: "); print_bytes(ks,16);
printf("\n");
printf("\nMultithreaded, will use " _YELLOW_("%u") " threads\n", g_num_cpus);
printf("Initializing lookup tables for increasing cipher speed\n");
std::thread foo_left(init_lookup_left);
std::thread foo_right(init_lookup_right);
std::thread foo_leftsub(init_lookup_left_substraction);
std::thread foo_rightsub(init_lookup_right_substraction);
foo_left.join();
foo_right.join();
foo_leftsub.join();
foo_rightsub.join();
// Load in the ci (tag-nonce), together with the first half of Q (reader-nonce)
rstate_before_gc = 0;
lstate_before_gc = 0;
for (pos = 0; pos < 4; pos++) {
next_right_fast(Ci[2*pos ], &rstate_before_gc);
next_right_fast(Ci[2*pos+1], &rstate_before_gc);
next_right_fast(Q[pos], &rstate_before_gc);
next_left_fast(Ci[2*pos ], &lstate_before_gc);
next_left_fast(Ci[2*pos+1], &lstate_before_gc);
next_left_fast(Q[pos], &lstate_before_gc);
}
printf("Determing the right states that correspond to the keystream\n");
//rbits = sm_right(ks, mask, &rstates);
rbits = ice_sm_right(ks, mask, &rstates);
printf("Top-bin for the right state contains " _GREEN_("%d")" correct bits\n", rbits);
printf("Total count of right bins: " _YELLOW_("%lu") "\n", (unsigned long)rstates.size());
if (rbits < 96) {
printf(_RED_("\n WARNING!!! Better find another trace, the right top-bin is < 96 bits\n\n"));
}
for (itrstates = rstates.begin(); itrstates != rstates.end(); ++itrstates) {
rstate_after_gc = *itrstates;
sm_left_mask(ks, mask, rstate_after_gc);
printf("Using the state from the top-right bin: " _YELLOW_("0x%07llx")"\n", (unsigned long long)rstate_after_gc);
search_gc_candidates_right(rstate_before_gc, rstate_after_gc, Q, &crstates);
printf("Found " _YELLOW_("%lu")" right candidates using the meet-in-the-middle attack\n", crstates.size());
if (crstates.size() == 0) continue;
printf("Calculating left states using the (unknown bits) mask from the top-right state\n");
//sm_left(ks, mask, &clstates);
ice_sm_left(ks, mask, &clstates);
printf("Found a total of " _YELLOW_("%lu")" left cipher states, recovering left candidates...\n", clstates.size());
if (clstates.size() == 0) continue;
search_gc_candidates_left(lstate_before_gc, Q, &clstates);
printf("The meet-in-the-middle attack returned " _YELLOW_("%lu")" left cipher candidates\n", clstates.size());
if (clstates.size() == 0) continue;
printf("Combining left and right states, disposing invalid combinations\n");
combine_valid_left_right_states(&clstates, &crstates, &pgc_candidates);
printf("Filtering the correct one using the middle part\n");
key_found = ATOMIC_VAR_INIT(false);
std::vector<std::thread> threads(g_num_cpus);
for (uint8_t m = 0; m < g_num_cpus; m++) {
threads[m] = std::thread( ice_compare, m, g_num_cpus, &pgc_candidates, &ostate, ref(Ci), ref(Q), ref(Ch), ref(Ci_1) );
}
for (auto& t : threads) {
t.join();
}
if (key_found) {
printf("\nFound valid key: " _GREEN_("%016lX")"\n\n", key.load());
break;
}
printf(_RED_("\nCould not find key using this right cipher state.\n\n"));
}
return 0;
}