mirror of
https://gitlab.com/nkming2/nc-photos.git
synced 2025-01-22 16:56:19 +01:00
Add tensorflow gpu library
This commit is contained in:
parent
f0ea012056
commit
fa0d275fad
5 changed files with 168 additions and 0 deletions
|
@ -0,0 +1,163 @@
|
|||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
// Encapsulated compilation/runtime tradeoffs.
|
||||
enum TfLiteGpuInferenceUsage {
|
||||
// Delegate will be used only once, therefore, bootstrap/init time should
|
||||
// be taken into account.
|
||||
TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER = 0,
|
||||
|
||||
// Prefer maximizing the throughput. Same delegate will be used repeatedly on
|
||||
// multiple inputs.
|
||||
TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED = 1,
|
||||
};
|
||||
|
||||
enum TfLiteGpuInferencePriority {
|
||||
// AUTO priority is needed when a single priority is the most important
|
||||
// factor. For example,
|
||||
// priority1 = MIN_LATENCY would result in the configuration that achieves
|
||||
// maximum performance.
|
||||
TFLITE_GPU_INFERENCE_PRIORITY_AUTO = 0,
|
||||
TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION = 1,
|
||||
TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY = 2,
|
||||
TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE = 3,
|
||||
};
|
||||
|
||||
// Used to toggle experimental flags used in the delegate. Note that this is a
|
||||
// bitmask, so the values should be 1, 2, 4, 8, ...etc.
|
||||
enum TfLiteGpuExperimentalFlags {
|
||||
TFLITE_GPU_EXPERIMENTAL_FLAGS_NONE = 0,
|
||||
// Enables inference on quantized models with the delegate.
|
||||
// NOTE: This is enabled in TfLiteGpuDelegateOptionsV2Default.
|
||||
TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT = 1 << 0,
|
||||
// Enforces execution with the provided backend.
|
||||
TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY = 1 << 1,
|
||||
TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY = 1 << 2,
|
||||
// Enable serialization of GPU kernels & model data. Speeds up initilization
|
||||
// at the cost of space on disk.
|
||||
// Delegate performs serialization the first time it is applied with a new
|
||||
// model or inference params. Later initializations are fast.
|
||||
// ModifyGraphWithDelegate will fail if data cannot be serialized.
|
||||
//
|
||||
// NOTE: User also needs to set serialization_dir & model_token in
|
||||
// TfLiteGpuDelegateOptionsV2.
|
||||
// Currently works only if CL backend is used.
|
||||
TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_SERIALIZATION = 1 << 3,
|
||||
};
|
||||
|
||||
// IMPORTANT: Always use TfLiteGpuDelegateOptionsV2Default() method to create
|
||||
// new instance of TfLiteGpuDelegateOptionsV2, otherwise every new added option
|
||||
// may break inference.
|
||||
typedef struct {
|
||||
// When set to zero, computations are carried out in maximal possible
|
||||
// precision. Otherwise, the GPU may quantify tensors, downcast values,
|
||||
// process in FP16 to increase performance. For most models precision loss is
|
||||
// warranted.
|
||||
// [OBSOLETE]: to be removed
|
||||
int32_t is_precision_loss_allowed;
|
||||
|
||||
// Preference is defined in TfLiteGpuInferenceUsage.
|
||||
int32_t inference_preference;
|
||||
|
||||
// Ordered priorities provide better control over desired semantics,
|
||||
// where priority(n) is more important than priority(n+1), therefore,
|
||||
// each time inference engine needs to make a decision, it uses
|
||||
// ordered priorities to do so.
|
||||
// For example:
|
||||
// MAX_PRECISION at priority1 would not allow to decrease precision,
|
||||
// but moving it to priority2 or priority3 would result in F16 calculation.
|
||||
//
|
||||
// Priority is defined in TfLiteGpuInferencePriority.
|
||||
// AUTO priority can only be used when higher priorities are fully specified.
|
||||
// For example:
|
||||
// VALID: priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO
|
||||
// VALID: priority1 = MIN_LATENCY, priority2 = MAX_PRECISION,
|
||||
// priority3 = AUTO
|
||||
// INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO
|
||||
// INVALID: priority1 = MIN_LATENCY, priority2 = AUTO,
|
||||
// priority3 = MAX_PRECISION
|
||||
// Invalid priorities will result in error.
|
||||
int32_t inference_priority1;
|
||||
int32_t inference_priority2;
|
||||
int32_t inference_priority3;
|
||||
|
||||
// Bitmask flags. See the comments in TfLiteGpuExperimentalFlags.
|
||||
int64_t experimental_flags;
|
||||
|
||||
// A graph could have multiple partitions that can be delegated to the GPU.
|
||||
// This limits the maximum number of partitions to be delegated. By default,
|
||||
// it's set to 1 in TfLiteGpuDelegateOptionsV2Default().
|
||||
int32_t max_delegated_partitions;
|
||||
|
||||
// The nul-terminated directory to use for serialization.
|
||||
// Whether serialization actually happens or not is dependent on backend used
|
||||
// and validity of this directory.
|
||||
// Set to nullptr in TfLiteGpuDelegateOptionsV2Default(), which implies the
|
||||
// delegate will not try serialization.
|
||||
//
|
||||
// NOTE: Users should ensure that this directory is private to the app to
|
||||
// avoid data access issues.
|
||||
const char* serialization_dir;
|
||||
|
||||
// The unique nul-terminated token string that acts as a 'namespace' for
|
||||
// all serialization entries.
|
||||
// Should be unique to a particular model (graph & constants).
|
||||
// For an example of how to generate this from a TFLite model, see
|
||||
// StrFingerprint() in lite/delegates/serialization.h.
|
||||
//
|
||||
// Set to nullptr in TfLiteGpuDelegateOptionsV2Default(), which implies the
|
||||
// delegate will not try serialization.
|
||||
const char* model_token;
|
||||
} TfLiteGpuDelegateOptionsV2;
|
||||
|
||||
// Populates TfLiteGpuDelegateOptionsV2 as follows:
|
||||
// is_precision_loss_allowed = false
|
||||
// inference_preference = TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER
|
||||
// priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION
|
||||
// priority2 = TFLITE_GPU_INFERENCE_PRIORITY_AUTO
|
||||
// priority3 = TFLITE_GPU_INFERENCE_PRIORITY_AUTO
|
||||
// experimental_flags = TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT
|
||||
// max_delegated_partitions = 1
|
||||
TFL_CAPI_EXPORT TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default();
|
||||
|
||||
// Creates a new delegate instance that need to be destroyed with
|
||||
// TfLiteGpuDelegateV2Delete when delegate is no longer used by TFLite.
|
||||
//
|
||||
// This delegate encapsulates multiple GPU-acceleration APIs under the hood to
|
||||
// make use of the fastest available on a device.
|
||||
//
|
||||
// When `options` is set to `nullptr`, then default options are used.
|
||||
TFL_CAPI_EXPORT TfLiteDelegate* TfLiteGpuDelegateV2Create(
|
||||
const TfLiteGpuDelegateOptionsV2* options);
|
||||
|
||||
// Destroys a delegate created with `TfLiteGpuDelegateV2Create` call.
|
||||
TFL_CAPI_EXPORT void TfLiteGpuDelegateV2Delete(TfLiteDelegate* delegate);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_H_
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -22,6 +22,10 @@ add_library( tensorflowlite SHARED IMPORTED )
|
|||
set_target_properties( tensorflowlite PROPERTIES IMPORTED_LOCATION
|
||||
${dependency_DIR}/tensorflowlite/jni/${ANDROID_ABI}/libtensorflowlite_jni.so )
|
||||
|
||||
add_library( tensorflowlite_gpu SHARED IMPORTED )
|
||||
set_target_properties( tensorflowlite_gpu PROPERTIES IMPORTED_LOCATION
|
||||
${dependency_DIR}/tensorflowlite/jni/${ANDROID_ABI}/libtensorflowlite_gpu_jni.so )
|
||||
|
||||
add_library( renderscript-intrinsics-replacement-toolkit SHARED IMPORTED )
|
||||
set_target_properties( renderscript-intrinsics-replacement-toolkit PROPERTIES IMPORTED_LOCATION
|
||||
${dependency_DIR}/renderscript-intrinsics-replacement-toolkit/jni/${ANDROID_ABI}/librenderscript-toolkit.so )
|
||||
|
@ -89,5 +93,6 @@ target_link_libraries( # Specifies the target library.
|
|||
${log-lib}
|
||||
${android-lib}
|
||||
tensorflowlite
|
||||
tensorflowlite_gpu
|
||||
renderscript-intrinsics-replacement-toolkit
|
||||
-fopenmp -static-openmp )
|
||||
|
|
Loading…
Reference in a new issue