diff --git a/plugin/android/dependency/tensorflowlite/headers/tensorflow/lite/delegates/gpu/delegate.h b/plugin/android/dependency/tensorflowlite/headers/tensorflow/lite/delegates/gpu/delegate.h
deleted file mode 100644
index 3a1e1811..00000000
--- a/plugin/android/dependency/tensorflowlite/headers/tensorflow/lite/delegates/gpu/delegate.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_H_
-#define TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_H_
-
-#include <stdint.h>
-
-#include "tensorflow/lite/c/common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-// Encapsulated compilation/runtime tradeoffs.
-enum TfLiteGpuInferenceUsage {
-  // Delegate will be used only once, therefore, bootstrap/init time should
-  // be taken into account.
-  TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER = 0,
-
-  // Prefer maximizing the throughput. Same delegate will be used repeatedly on
-  // multiple inputs.
-  TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED = 1,
-};
-
-enum TfLiteGpuInferencePriority {
-  // AUTO priority is needed when a single priority is the most important
-  // factor. For example,
-  // priority1 = MIN_LATENCY would result in the configuration that achieves
-  // maximum performance.
-  TFLITE_GPU_INFERENCE_PRIORITY_AUTO = 0,
-  TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION = 1,
-  TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY = 2,
-  TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE = 3,
-};
-
-// Used to toggle experimental flags used in the delegate. Note that this is a
-// bitmask, so the values should be 1, 2, 4, 8, ...etc.
-enum TfLiteGpuExperimentalFlags {
-  TFLITE_GPU_EXPERIMENTAL_FLAGS_NONE = 0,
-  // Enables inference on quantized models with the delegate.
-  // NOTE: This is enabled in TfLiteGpuDelegateOptionsV2Default.
-  TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT = 1 << 0,
-  // Enforces execution with the provided backend.
-  TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY = 1 << 1,
-  TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY = 1 << 2,
-  // Enable serialization of GPU kernels & model data. Speeds up initilization
-  // at the cost of space on disk.
-  // Delegate performs serialization the first time it is applied with a new
-  // model or inference params. Later initializations are fast.
-  // ModifyGraphWithDelegate will fail if data cannot be serialized.
-  //
-  // NOTE: User also needs to set serialization_dir & model_token in
-  // TfLiteGpuDelegateOptionsV2.
-  // Currently works only if CL backend is used.
-  TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_SERIALIZATION = 1 << 3,
-};
-
-// IMPORTANT: Always use TfLiteGpuDelegateOptionsV2Default() method to create
-// new instance of TfLiteGpuDelegateOptionsV2, otherwise every new added option
-// may break inference.
-typedef struct {
-  // When set to zero, computations are carried out in maximal possible
-  // precision. Otherwise, the GPU may quantify tensors, downcast values,
-  // process in FP16 to increase performance. For most models precision loss is
-  // warranted.
-  // [OBSOLETE]: to be removed
-  int32_t is_precision_loss_allowed;
-
-  // Preference is defined in TfLiteGpuInferenceUsage.
-  int32_t inference_preference;
-
-  // Ordered priorities provide better control over desired semantics,
-  // where priority(n) is more important than priority(n+1), therefore,
-  // each time inference engine needs to make a decision, it uses
-  // ordered priorities to do so.
-  // For example:
-  //   MAX_PRECISION at priority1 would not allow to decrease precision,
-  //   but moving it to priority2 or priority3 would result in F16 calculation.
-  //
-  // Priority is defined in TfLiteGpuInferencePriority.
-  // AUTO priority can only be used when higher priorities are fully specified.
-  // For example:
-  //   VALID:   priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO
-  //   VALID:   priority1 = MIN_LATENCY, priority2 = MAX_PRECISION,
-  //            priority3 = AUTO
-  //   INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO
-  //   INVALID: priority1 = MIN_LATENCY, priority2 = AUTO,
-  //            priority3 = MAX_PRECISION
-  // Invalid priorities will result in error.
-  int32_t inference_priority1;
-  int32_t inference_priority2;
-  int32_t inference_priority3;
-
-  // Bitmask flags. See the comments in TfLiteGpuExperimentalFlags.
-  int64_t experimental_flags;
-
-  // A graph could have multiple partitions that can be delegated to the GPU.
-  // This limits the maximum number of partitions to be delegated. By default,
-  // it's set to 1 in TfLiteGpuDelegateOptionsV2Default().
-  int32_t max_delegated_partitions;
-
-  // The nul-terminated directory to use for serialization.
-  // Whether serialization actually happens or not is dependent on backend used
-  // and validity of this directory.
-  // Set to nullptr in TfLiteGpuDelegateOptionsV2Default(), which implies the
-  // delegate will not try serialization.
-  //
-  // NOTE: Users should ensure that this directory is private to the app to
-  // avoid data access issues.
-  const char* serialization_dir;
-
-  // The unique nul-terminated token string that acts as a 'namespace' for
-  // all serialization entries.
-  // Should be unique to a particular model (graph & constants).
-  // For an example of how to generate this from a TFLite model, see
-  // StrFingerprint() in lite/delegates/serialization.h.
-  //
-  // Set to nullptr in TfLiteGpuDelegateOptionsV2Default(), which implies the
-  // delegate will not try serialization.
-  const char* model_token;
-} TfLiteGpuDelegateOptionsV2;
-
-// Populates TfLiteGpuDelegateOptionsV2 as follows:
-//   is_precision_loss_allowed = false
-//   inference_preference = TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER
-//   priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION
-//   priority2 = TFLITE_GPU_INFERENCE_PRIORITY_AUTO
-//   priority3 = TFLITE_GPU_INFERENCE_PRIORITY_AUTO
-//   experimental_flags = TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT
-//   max_delegated_partitions = 1
-TFL_CAPI_EXPORT TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default();
-
-// Creates a new delegate instance that need to be destroyed with
-// TfLiteGpuDelegateV2Delete when delegate is no longer used by TFLite.
-//
-// This delegate encapsulates multiple GPU-acceleration APIs under the hood to
-// make use of the fastest available on a device.
-//
-// When `options` is set to `nullptr`, then default options are used.
-TFL_CAPI_EXPORT TfLiteDelegate* TfLiteGpuDelegateV2Create(
-    const TfLiteGpuDelegateOptionsV2* options);
-
-// Destroys a delegate created with `TfLiteGpuDelegateV2Create` call.
-TFL_CAPI_EXPORT void TfLiteGpuDelegateV2Delete(TfLiteDelegate* delegate);
-
-#ifdef __cplusplus
-}
-#endif  // __cplusplus
-
-#endif  // TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_H_
diff --git a/plugin/android/dependency/tensorflowlite/jni/arm64-v8a/libtensorflowlite_gpu_jni.so b/plugin/android/dependency/tensorflowlite/jni/arm64-v8a/libtensorflowlite_gpu_jni.so
deleted file mode 100644
index 2ecb985a..00000000
Binary files a/plugin/android/dependency/tensorflowlite/jni/arm64-v8a/libtensorflowlite_gpu_jni.so and /dev/null differ
diff --git a/plugin/android/dependency/tensorflowlite/jni/armeabi-v7a/libtensorflowlite_gpu_jni.so b/plugin/android/dependency/tensorflowlite/jni/armeabi-v7a/libtensorflowlite_gpu_jni.so
deleted file mode 100644
index f8fe62d0..00000000
Binary files a/plugin/android/dependency/tensorflowlite/jni/armeabi-v7a/libtensorflowlite_gpu_jni.so and /dev/null differ
diff --git a/plugin/android/dependency/tensorflowlite/jni/x86_64/libtensorflowlite_gpu_jni.so b/plugin/android/dependency/tensorflowlite/jni/x86_64/libtensorflowlite_gpu_jni.so
deleted file mode 100644
index 8bb03d44..00000000
Binary files a/plugin/android/dependency/tensorflowlite/jni/x86_64/libtensorflowlite_gpu_jni.so and /dev/null differ
diff --git a/plugin/android/src/main/cpp/CMakeLists.txt b/plugin/android/src/main/cpp/CMakeLists.txt
index 4884de70..834e8ded 100644
--- a/plugin/android/src/main/cpp/CMakeLists.txt
+++ b/plugin/android/src/main/cpp/CMakeLists.txt
@@ -22,10 +22,6 @@ add_library( tensorflowlite SHARED IMPORTED )
 set_target_properties( tensorflowlite PROPERTIES IMPORTED_LOCATION
     ${dependency_DIR}/tensorflowlite/jni/${ANDROID_ABI}/libtensorflowlite_jni.so )
 
-add_library( tensorflowlite_gpu SHARED IMPORTED )
-set_target_properties( tensorflowlite_gpu PROPERTIES IMPORTED_LOCATION
-    ${dependency_DIR}/tensorflowlite/jni/${ANDROID_ABI}/libtensorflowlite_gpu_jni.so )
-
 add_library( renderscript-intrinsics-replacement-toolkit SHARED IMPORTED )
 set_target_properties( renderscript-intrinsics-replacement-toolkit PROPERTIES IMPORTED_LOCATION
     ${dependency_DIR}/renderscript-intrinsics-replacement-toolkit/jni/${ANDROID_ABI}/librenderscript-toolkit.so )
@@ -93,6 +89,5 @@ target_link_libraries( # Specifies the target library.
                        ${log-lib}
                        ${android-lib}
                        tensorflowlite
-                       tensorflowlite_gpu
                        renderscript-intrinsics-replacement-toolkit
                        -fopenmp -static-openmp )
diff --git a/plugin/android/src/main/cpp/arbitrary_style_transfer.cpp b/plugin/android/src/main/cpp/arbitrary_style_transfer.cpp
index 29a88324..d34d9a53 100644
--- a/plugin/android/src/main/cpp/arbitrary_style_transfer.cpp
+++ b/plugin/android/src/main/cpp/arbitrary_style_transfer.cpp
@@ -12,7 +12,6 @@
 #include <exception>
 #include <jni.h>
 #include <tensorflow/lite/c/c_api.h>
-#include <tensorflow/lite/delegates/gpu/delegate.h>
 #include <vector>
 
 using namespace plugin;
@@ -185,11 +184,6 @@ ArbitraryStyleTransfer::transfer(const uint8_t *image, const size_t width,
 vector<float> ArbitraryStyleTransfer::predictStyle(const uint8_t *style) {
   InterpreterOptions options;
   options.setNumThreads(getNumberOfProcessors());
-
-  auto gpuOptions = TfLiteGpuDelegateOptionsV2Default();
-  auto gpuDelegate = AutoTfLiteDelegate(TfLiteGpuDelegateV2Create(&gpuOptions));
-  options.addDelegate(gpuDelegate.get());
-
   Interpreter interpreter(predictModel, options);
   interpreter.allocateTensors();
 
diff --git a/plugin/android/src/main/cpp/deep_lap_3.cpp b/plugin/android/src/main/cpp/deep_lap_3.cpp
index bbfce12c..dc4fe944 100644
--- a/plugin/android/src/main/cpp/deep_lap_3.cpp
+++ b/plugin/android/src/main/cpp/deep_lap_3.cpp
@@ -14,7 +14,6 @@
 #include <exception>
 #include <jni.h>
 #include <tensorflow/lite/c/c_api.h>
-#include <tensorflow/lite/delegates/gpu/delegate.h>
 
 #include "./filter/saturation.h"
 
@@ -208,11 +207,6 @@ vector<uint8_t> DeepLab3::infer(const uint8_t *image, const size_t width,
                                 const size_t height) {
   InterpreterOptions options;
   options.setNumThreads(getNumberOfProcessors());
-
-  auto gpuOptions = TfLiteGpuDelegateOptionsV2Default();
-  auto gpuDelegate = AutoTfLiteDelegate(TfLiteGpuDelegateV2Create(&gpuOptions));
-  options.addDelegate(gpuDelegate.get());
-
   Interpreter interpreter(model, options);
   interpreter.allocateTensors();
 
diff --git a/plugin/android/src/main/cpp/tflite_wrapper.cpp b/plugin/android/src/main/cpp/tflite_wrapper.cpp
index 8a2fd177..9978fb9b 100644
--- a/plugin/android/src/main/cpp/tflite_wrapper.cpp
+++ b/plugin/android/src/main/cpp/tflite_wrapper.cpp
@@ -2,7 +2,6 @@
 #include "util.h"
 #include <exception>
 #include <tensorflow/lite/c/c_api.h>
-#include <tensorflow/lite/delegates/gpu/delegate.h>
 
 using namespace plugin;
 using namespace std;
@@ -100,10 +99,4 @@ const TfLiteTensor *Interpreter::getOutputTensor(const int32_t outputIndex) {
   return TfLiteInterpreterGetOutputTensor(interpreter, outputIndex);
 }
 
-AutoTfLiteDelegate::~AutoTfLiteDelegate() {
-  if (inst) {
-    TfLiteGpuDelegateV2Delete(inst);
-  }
-}
-
 } // namespace tflite
\ No newline at end of file
diff --git a/plugin/android/src/main/cpp/tflite_wrapper.h b/plugin/android/src/main/cpp/tflite_wrapper.h
index 0e57bd13..80d4fbdc 100644
--- a/plugin/android/src/main/cpp/tflite_wrapper.h
+++ b/plugin/android/src/main/cpp/tflite_wrapper.h
@@ -55,17 +55,4 @@ private:
   TfLiteInterpreter *interpreter = nullptr;
 };
 
-class AutoTfLiteDelegate {
-public:
-  explicit AutoTfLiteDelegate(TfLiteDelegate *inst) : inst(inst) {}
-  ~AutoTfLiteDelegate();
-
-  TfLiteDelegate &operator*() { return *inst; }
-  TfLiteDelegate *operator->() { return inst; }
-  TfLiteDelegate *get() { return inst; }
-
-private:
-  TfLiteDelegate *const inst;
-};
-
 } // namespace tflite
diff --git a/plugin/android/src/main/cpp/zero_dce.cpp b/plugin/android/src/main/cpp/zero_dce.cpp
index d43c9ff6..55e9763c 100644
--- a/plugin/android/src/main/cpp/zero_dce.cpp
+++ b/plugin/android/src/main/cpp/zero_dce.cpp
@@ -11,7 +11,6 @@
 #include <exception>
 #include <jni.h>
 #include <tensorflow/lite/c/c_api.h>
-#include <tensorflow/lite/delegates/gpu/delegate.h>
 
 using namespace plugin;
 using namespace std;
@@ -85,11 +84,6 @@ vector<uint8_t> ZeroDce::inferAlphaMaps(const uint8_t *image,
                                         const size_t height) {
   InterpreterOptions options;
   options.setNumThreads(getNumberOfProcessors());
-
-  auto gpuOptions = TfLiteGpuDelegateOptionsV2Default();
-  auto gpuDelegate = AutoTfLiteDelegate(TfLiteGpuDelegateV2Create(&gpuOptions));
-  options.addDelegate(gpuDelegate.get());
-
   Interpreter interpreter(model, options);
   interpreter.allocateTensors();