Hi,
According to https://github.com/openfheorg/openfhe-development/blob/main/docs/static_docs/Best_Performance.md, it’s said that to take advantage of AVX512_IFMA optimizations, the small moduli should be below 50 bits. If they are larger, slower instructions are used.
In this case, small moduli means q_i or dcrtBits, right?
Thank you in advance.
In response to your comments, I created the following code to measure the bootstrapping time for each mod size.
I found that the time hardly changed even when the mod size was set to less than 50.
Naturally, I’m using HEXL on Intel(R) Xeon(R) 6740P.
Please let me know your thoughts.
#define PROFILE
#include "openfhe.h"
#include <chrono>
using namespace lbcrypto;
std::chrono::high_resolution_clock::time_point start_layer;
std::chrono::high_resolution_clock::time_point end_layer;
std::chrono::duration<double> dur_layer;
int main(int argc, char* argv[]) {
CCParams<CryptoContextCKKSRNS> parameters;
SecretKeyDist secretKeyDist = SPARSE_TERNARY;
parameters.SetSecretKeyDist(secretKeyDist);
parameters.SetSecurityLevel(HEStd_128_classic);
parameters.SetRingDim(1 << 16);
parameters.SetNumLargeDigits(3);
parameters.SetKeySwitchTechnique(HYBRID);
ScalingTechnique rescaleTech = FIXEDMANUAL;
usint dcrtBits = 48; //below 50bit
usint firstMod = 49; //below 50bit
parameters.SetScalingModSize(dcrtBits);
parameters.SetScalingTechnique(rescaleTech);
parameters.SetFirstModSize(firstMod);
std::vector<uint32_t> levelBudget = {3, 3};
std::vector<uint32_t> bsgsDim = {0, 0};
uint32_t approxBootstrapDepth = 9;
uint32_t levelsAvailableAfterBootstrap = 8;
usint depth = levelsAvailableAfterBootstrap + FHECKKSRNS::GetBootstrapDepth(approxBootstrapDepth, levelBudget, secretKeyDist);
parameters.SetMultiplicativeDepth(depth);
CryptoContext<DCRTPoly> cryptoContext = GenCryptoContext(parameters);
cryptoContext->Enable(PKE);
cryptoContext->Enable(KEYSWITCH);
cryptoContext->Enable(LEVELEDSHE);
cryptoContext->Enable(ADVANCEDSHE);
cryptoContext->Enable(FHE);
usint ringDim = cryptoContext->GetRingDimension();
uint32_t numSlots = ringDim/2;
cryptoContext->EvalBootstrapSetup(levelBudget, bsgsDim, numSlots);
auto keyPair = cryptoContext->KeyGen();
cryptoContext->EvalMultKeyGen(keyPair.secretKey);
cryptoContext->EvalBootstrapKeyGen(keyPair.secretKey, numSlots);
std::vector<double> x;
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis(0.0, 1.0);
for (size_t i = 0; i < numSlots; i++) {
x.push_back(dis(gen));
}
Plaintext ptxt = cryptoContext->MakeCKKSPackedPlaintext(x, 1, depth-1, nullptr, numSlots);
Ciphertext<DCRTPoly> ciph = cryptoContext->Encrypt(keyPair.publicKey, ptxt);
//Start time measurement
start_layer = std::chrono::high_resolution_clock::now();
for (int i=0; i<10; i++){
Ciphertext<DCRTPoly> tmp = cryptoContext->EvalBootstrap(ciph);
}
//Finish time measurement
end_layer = std::chrono::high_resolution_clock::now();
dur_layer = std::chrono::duration_cast<std::chrono::nanoseconds>(end_layer - start_layer);
//result
std::cout << "Time take:" << dur_layer.count() << "[s]" << std::endl;
}
dcrtBits = 48, firstMod = 49
Time take:75.0234[s]
dcrtBits = 51, firstMod = 60
Time take:80.8023[s]