Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TMVA_SOFIE_Keras.py File Reference

Detailed Description

View in nbviewer Open in SWAN
This macro provides a simple example for the parsing of Keras .keras file into RModel object and further generating the .hxx header files for inference.

import contextlib
import warnings
import numpy as np
import ROOT
from tensorflow.keras.layers import Activation, Dense, Input, Softmax
from tensorflow.keras.models import Model
# Enable ROOT in batch mode (same effect as -nodraw)
@contextlib.contextmanager
def expect_warning(category, message):
"""Silence a known third-party warning and raise if it stops firing.
Notifies us to drop the workaround once the upstream library is fixed.
"""
with warnings.catch_warnings(record=True) as caught:
yield
seen = False
for w in caught:
if issubclass(w.category, category) and message in str(w.message):
seen = True
else:
if not seen:
raise RuntimeError(
f"Expected {category.__name__} containing {message!r} was not "
"emitted. This tutorial's workaround can probably be removed."
)
# -----------------------------------------------------------------------------
# Step 1: Create and train a simple Keras model (via embedded Python)
# -----------------------------------------------------------------------------
input = Input(shape=(4,), batch_size=2)
x = Dense(32)(input)
x = Activation("relu")(x)
x = Dense(16, activation="relu")(x)
x = Dense(8, activation="relu")(x)
x = Dense(2)(x)
output = Softmax()(x)
model = Model(inputs=input, outputs=output)
randomGenerator = np.random.RandomState(0)
x_train = randomGenerator.rand(4, 4)
y_train = randomGenerator.rand(4, 2)
model.compile(loss="mse", optimizer="adam")
model.fit(x_train, y_train, epochs=3, batch_size=2)
# Keras' internal ``np.array(x)`` (TensorFlow backend) does not yet implement
# the NumPy 2.0 ``__array__(copy=...)`` signature, so saving the model emits a
# DeprecationWarning that we cannot fix from user code.
if tuple(int(p) for p in np.__version__.split(".")[:2]) >= (2, 0):
ctx = expect_warning(DeprecationWarning, "__array__ implementation doesn't accept a copy keyword")
else:
with ctx:
model.save("KerasModel.keras")
# -----------------------------------------------------------------------------
# Step 2: Use TMVA::SOFIE to parse the ONNX model
# -----------------------------------------------------------------------------
# Parse the ONNX model
model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse("KerasModel.keras")
# Generate inference code
# print generated code
print("\n**************************************************")
print(" Generated code")
print("**************************************************\n")
print("**************************************************\n\n\n")
# Compile the generated code
ROOT.gInterpreter.Declare('#include "KerasModel.hxx"')
# -----------------------------------------------------------------------------
# Step 3: Run inference
# -----------------------------------------------------------------------------
# instantiate SOFIE session class
# Input tensor (same shape as training input)
x = np.array([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]], dtype=np.float32)
# Run inference
print("Inference output:", y)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Epoch 1/3
␛[1m1/2␛[0m ␛[32m━━━━━━━━━━␛[0m␛[37m━━━━━━━━━━␛[0m ␛[1m0s␛[0m 655ms/step - loss: 0.0796␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈
␛[1m2/2␛[0m ␛[32m━━━━━━━━━━━━━━━━━━━━␛[0m␛[37m␛[0m ␛[1m1s␛[0m 11ms/step - loss: 0.1251
Epoch 2/3
␛[1m1/2␛[0m ␛[32m━━━━━━━━━━␛[0m␛[37m━━━━━━━━━━␛[0m ␛[1m0s␛[0m 11ms/step - loss: 0.0773␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈
␛[1m2/2␛[0m ␛[32m━━━━━━━━━━━━━━━━━━━━␛[0m␛[37m␛[0m ␛[1m0s␛[0m 10ms/step - loss: 0.1228
Epoch 3/3
␛[1m1/2␛[0m ␛[32m━━━━━━━━━━␛[0m␛[37m━━━━━━━━━━␛[0m ␛[1m0s␛[0m 11ms/step - loss: 0.1433␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈␈
␛[1m2/2␛[0m ␛[32m━━━━━━━━━━━━━━━━━━━━␛[0m␛[37m␛[0m ␛[1m0s␛[0m 10ms/step - loss: 0.1209
Model: "functional"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_layer (InputLayer) │ (2, 4) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense) │ (2, 32) │ 160 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ activation (Activation) │ (2, 32) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense) │ (2, 16) │ 528 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_2 (Dense) │ (2, 8) │ 136 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_3 (Dense) │ (2, 2) │ 18 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ softmax (Softmax) │ (2, 2) │ 0 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 2,528 (9.88 KB)
Trainable params: 842 (3.29 KB)
Non-trainable params: 0 (0.00 B)
Optimizer params: 1,686 (6.59 KB)
PyKeras: parsing model KerasModel.keras
**************************************************
Generated code
**************************************************
//Code generated automatically by TMVA for Inference of Model file [KerasModel.keras] at [Tue Jun 23 02:03:02 202]
#ifndef ROOT_TMVA_SOFIE_KERASMODEL
#define ROOT_TMVA_SOFIE_KERASMODEL
#include <algorithm>
#include <cmath>
#include <vector>
#include "TMVA/SOFIE_common.hxx"
#include <fstream>
namespace TMVA_SOFIE_KerasModel{
namespace BLAS{
extern "C" void sgemm_(const char * transa, const char * transb, const int * m, const int * n, const int * k,
const float * alpha, const float * A, const int * lda, const float * B, const int * ldb,
const float * beta, float * C, const int * ldc);
extern "C" void sgemv_(const char * trans, const int * m, const int * n, const float * alpha, const float * A,
const int * lda, const float * X, const int * incx, const float * beta, const float * Y, const int * incy);
}//BLAS
struct Session;
inline void doInfer(Session const &session, float const* tensor_input_layer, float *tensor_keras_tensor_18 );
struct Session {
// initialized (weights and constant) tensors
std::vector<float> fTensor_dense_3kernel = std::vector<float>(16);
float * tensor_dense_3kernel = fTensor_dense_3kernel.data();
std::vector<float> fTensor_dense_2bias = std::vector<float>(8);
float * tensor_dense_2bias = fTensor_dense_2bias.data();
std::vector<float> fTensor_dense_3bias = std::vector<float>(2);
float * tensor_dense_3bias = fTensor_dense_3bias.data();
std::vector<float> fTensor_dense_2kernel = std::vector<float>(128);
float * tensor_dense_2kernel = fTensor_dense_2kernel.data();
std::vector<float> fTensor_dense_1bias = std::vector<float>(16);
float * tensor_dense_1bias = fTensor_dense_1bias.data();
std::vector<float> fTensor_dense_1kernel = std::vector<float>(512);
float * tensor_dense_1kernel = fTensor_dense_1kernel.data();
std::vector<float> fTensor_densebias = std::vector<float>(32);
float * tensor_densebias = fTensor_densebias.data();
std::vector<float> fTensor_densekernel = std::vector<float>(128);
float * tensor_densekernel = fTensor_densekernel.data();
//--- Allocating session memory pool to be used for allocating intermediate tensors
std::vector<char> fIntermediateMemoryPool = std::vector<char>(512);
// --- Positioning intermediate tensor memory --
// Allocating memory for intermediate tensor keras_tensor_8 with size 256 bytes
float* tensor_keras_tensor_8 = reinterpret_cast<float*>(fIntermediateMemoryPool.data() + 0);
// Allocating memory for intermediate tensor keras_tensor_10 with size 256 bytes
float* tensor_keras_tensor_10 = reinterpret_cast<float*>(fIntermediateMemoryPool.data() + 256);
// Allocating memory for intermediate tensor dense_1Dense with size 128 bytes
float* tensor_dense_1Dense = reinterpret_cast<float*>(fIntermediateMemoryPool.data() + 128);
// Allocating memory for intermediate tensor keras_tensor_12 with size 128 bytes
float* tensor_keras_tensor_12 = reinterpret_cast<float*>(fIntermediateMemoryPool.data() + 0);
// Allocating memory for intermediate tensor dense_2Dense with size 64 bytes
float* tensor_dense_2Dense = reinterpret_cast<float*>(fIntermediateMemoryPool.data() + 448);
// Allocating memory for intermediate tensor keras_tensor_14 with size 64 bytes
float* tensor_keras_tensor_14 = reinterpret_cast<float*>(fIntermediateMemoryPool.data() + 384);
// Allocating memory for intermediate tensor keras_tensor_16 with size 16 bytes
float* tensor_keras_tensor_16 = reinterpret_cast<float*>(fIntermediateMemoryPool.data() + 368);
// Allocating memory for intermediate tensor keras_tensor_18 with size 16 bytes
float* tensor_keras_tensor_18 = reinterpret_cast<float*>(fIntermediateMemoryPool.data() + 352);
Session(std::string filename ="KerasModel.dat") {
//--- reading weights from file
std::ifstream f;
f.open(filename);
if (!f.is_open()) {
throw std::runtime_error("tmva-sofie failed to open file " + filename + " for input weights");
}
using TMVA::Experimental::SOFIE::ReadTensorFromStream;
ReadTensorFromStream(f, tensor_dense_3kernel, "tensor_dense_3kernel", 16);
ReadTensorFromStream(f, tensor_dense_2bias, "tensor_dense_2bias", 8);
ReadTensorFromStream(f, tensor_dense_3bias, "tensor_dense_3bias", 2);
ReadTensorFromStream(f, tensor_dense_2kernel, "tensor_dense_2kernel", 128);
ReadTensorFromStream(f, tensor_dense_1bias, "tensor_dense_1bias", 16);
ReadTensorFromStream(f, tensor_dense_1kernel, "tensor_dense_1kernel", 512);
ReadTensorFromStream(f, tensor_densebias, "tensor_densebias", 32);
ReadTensorFromStream(f, tensor_densekernel, "tensor_densekernel", 128);
f.close();
}
std::vector<float> infer(float const* tensor_input_layer){
std::vector<float > output_tensor_keras_tensor_18(4);
doInfer(*this, tensor_input_layer, output_tensor_keras_tensor_18.data() );
return {output_tensor_keras_tensor_18};
}
}; // end of Session
// Input tensor dimensions
using TMVA::Experimental::SOFIE::SingleDim;
using TMVA::Experimental::SOFIE::TensorDims;
using TMVA::Experimental::SOFIE::makeDims;
constexpr std::array<SingleDim, 2> dim_input_layer{SingleDim{2}, SingleDim{4}};
constexpr std::array<TensorDims, 1> inputTensorDims{
makeDims(dim_input_layer)
};
constexpr bool hasDynamicInputTensors{false};
inline void doInfer(Session const &session, float const* tensor_input_layer, float *tensor_keras_tensor_18 ) {
auto &tensor_dense_1Dense = session.tensor_dense_1Dense;
auto &tensor_dense_1bias = session.tensor_dense_1bias;
auto &tensor_dense_1kernel = session.tensor_dense_1kernel;
auto &tensor_dense_2Dense = session.tensor_dense_2Dense;
auto &tensor_dense_2bias = session.tensor_dense_2bias;
auto &tensor_dense_2kernel = session.tensor_dense_2kernel;
auto &tensor_dense_3bias = session.tensor_dense_3bias;
auto &tensor_dense_3kernel = session.tensor_dense_3kernel;
auto &tensor_densebias = session.tensor_densebias;
auto &tensor_densekernel = session.tensor_densekernel;
auto &tensor_keras_tensor_10 = session.tensor_keras_tensor_10;
auto &tensor_keras_tensor_12 = session.tensor_keras_tensor_12;
auto &tensor_keras_tensor_14 = session.tensor_keras_tensor_14;
auto &tensor_keras_tensor_16 = session.tensor_keras_tensor_16;
auto &tensor_keras_tensor_8 = session.tensor_keras_tensor_8;
//--------- Gemm op_0 { 2 , 4 } * { 4 , 32 } -> { 2 , 32 }
for (size_t j = 0; j < 2; j++) {
size_t y_index = 32 * j;
TMVA::Experimental::SOFIE::Copy(tensor_keras_tensor_8 + y_index, tensor_densebias, 32);
}
TMVA::Experimental::SOFIE::Gemm_Call(tensor_keras_tensor_8, false, false, 32, 2, 4, 1, tensor_densekernel, tensor_input_layer, 1,nullptr);
//------ RELU
for (int id = 0; id < 64 ; id++){
tensor_keras_tensor_10[id] = ((tensor_keras_tensor_8[id] > 0 )? tensor_keras_tensor_8[id] : 0);
}
//--------- Gemm op_2 { 2 , 32 } * { 32 , 16 } -> { 2 , 16 }
for (size_t j = 0; j < 2; j++) {
size_t y_index = 16 * j;
TMVA::Experimental::SOFIE::Copy(tensor_dense_1Dense + y_index, tensor_dense_1bias, 16);
}
TMVA::Experimental::SOFIE::Gemm_Call(tensor_dense_1Dense, false, false, 16, 2, 32, 1, tensor_dense_1kernel, tensor_keras_tensor_10, 1,nullptr);
//------ RELU
for (int id = 0; id < 32 ; id++){
tensor_keras_tensor_12[id] = ((tensor_dense_1Dense[id] > 0 )? tensor_dense_1Dense[id] : 0);
}
//--------- Gemm op_4 { 2 , 16 } * { 16 , 8 } -> { 2 , 8 }
for (size_t j = 0; j < 2; j++) {
size_t y_index = 8 * j;
TMVA::Experimental::SOFIE::Copy(tensor_dense_2Dense + y_index, tensor_dense_2bias, 8);
}
TMVA::Experimental::SOFIE::Gemm_Call(tensor_dense_2Dense, false, false, 8, 2, 16, 1, tensor_dense_2kernel, tensor_keras_tensor_12, 1,nullptr);
//------ RELU
for (int id = 0; id < 16 ; id++){
tensor_keras_tensor_14[id] = ((tensor_dense_2Dense[id] > 0 )? tensor_dense_2Dense[id] : 0);
}
//--------- Gemm op_6 { 2 , 8 } * { 8 , 2 } -> { 2 , 2 }
for (size_t j = 0; j < 2; j++) {
size_t y_index = 2 * j;
TMVA::Experimental::SOFIE::Copy(tensor_keras_tensor_16 + y_index, tensor_dense_3bias, 2);
}
TMVA::Experimental::SOFIE::Gemm_Call(tensor_keras_tensor_16, false, false, 2, 2, 8, 1, tensor_dense_3kernel, tensor_keras_tensor_14, 1,nullptr);
//------ SOFTMAX - 2 4 1
for (int i = 0; i < 2; ++i) {
size_t offset = i * 2;
float const * x_ptr = &tensor_keras_tensor_16[offset];
float * y_ptr = &tensor_keras_tensor_18[offset];
float vmax = x_ptr[0];
for (int j = 1; j < 2; ++j) {
if (x_ptr[j] > vmax) vmax = x_ptr[j];
}
float sum = 0.0;
for (int j = 0; j < 2; ++j) {
y_ptr[j] = std::exp(x_ptr[j] - vmax);
sum += y_ptr[j];
}
float inv_sum = 1.0f / sum;
for (int j = 0; j < 2; ++j) {
y_ptr[j] *= inv_sum;
}
}
}
} //TMVA_SOFIE_KerasModel
#endif // ROOT_TMVA_SOFIE_KERASMODEL
**************************************************
Inference output: { 0.498125f, 0.501875f, 0.501883f, 0.498117f }
Author
Sanjiban Sengupta and Lorenzo Moneta

Definition in file TMVA_SOFIE_Keras.py.