Jan 2025【日本東北】溫泉三昧之旅① 在大雪飄落的雪國,追尋秘湯之宿與極上美食(秋田~探訪有日本溫泉界頂點之稱的秘湯乳頭溫泉鶴之湯)

這裡記錄一下這幾天學習利用 Tensorflow Keras 和現有的圖像資料集,訓練一個簡單的圖像分類(Image Classification)模型,並透過 Streamlit 和 GitHub,將模型部署到網路服務上來測試,接著將模型轉換為適用於行動裝置的 TFLite 模型,並集成到 Android 應用程式上的過程。另外也直接透過 TFLite Model Maker 庫來訓練 TFLite 模型,並比較兩者的預測準確度。
註:TensorFlow Lite Model Maker 庫可以簡化將 TensorFlow 神經網路模型適配和轉換為特定輸入資料的過程。
我使用這篇文章裡面的 Intel Image Classification 圖像資料集來進行學習和測試。它包含了建築物(buildings)、森林(forest)、冰河(glacier)、山岳(mountain)、海洋(sea)、街道(street)等六種類型的圖像,總共有 14,000 多張訓練圖像、3000 多張驗證圖像,以及 7000 多張測試圖像,可以用來訓練出一個簡單的圖像分類模型。
下載完後照片資料集後,將 seg_train、seg_pred、seg_test 這三個資料和裡面的照片,上傳到 Google Drive 上,然後開啟一個 Colab 筆記本來編輯。因為 Python 3.10 版本在使用 TFLite Model Maker 套件時,會有已知的問題,所以我將筆記本中的 Python 執行環境設定為 3.9 版本。這裡是我的 Python 3.9 Playground 筆記本(可以作為一個starter樣板)
這個starter樣板的內容如下:
然後建立一個 image_pred.py 文件,並上傳到 Google Drive 雲端硬碟上,以下是這個 Python 腳本的完整內容:
# Imports | |
import warnings | |
warnings.filterwarnings("ignore") | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from tensorflow.keras.preprocessing import image | |
import splitfolders | |
from keras.preprocessing.image import ImageDataGenerator | |
import tensorflow as tf | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense | |
from tflite_support.metadata_writers import image_classifier | |
from tflite_support.metadata_writers import writer_utils | |
gpus = tf.config.list_physical_devices('GPU') | |
tf.config.experimental.set_memory_growth(gpus[0], True) | |
images = ["/content/seg_train/seg_train/sea/1.jpg", | |
"/content/seg_train/seg_train/buildings/10006.jpg", | |
"/content/seg_train/seg_train/forest/10010.jpg", | |
"/content/seg_train/seg_train/glacier/10003.jpg", | |
"/content/seg_train/seg_train/mountain/10000.jpg", | |
"/content/seg_train/seg_train/street/10015.jpg", | |
"/content/seg_train/seg_train/sea/10071.jpg", | |
"/content/seg_train/seg_train/buildings/1001.jpg", | |
"/content/seg_train/seg_train/forest/10007.jpg"] | |
plt.figure(figsize=(16, 8)) | |
for i in range(9): | |
img = plt.imread(images[i]) | |
plt.subplot(3,3, i + 1) | |
plt.imshow(img) | |
plt.title(f"Image {i+1}") | |
plt.axis("off") | |
# Spliting the data | |
splitfolders.ratio('/content/seg_train/seg_train', output="output", seed=1337, ratio=(0.8, 0.2)) | |
# Scaling | |
datagen = ImageDataGenerator( | |
rescale=1.0/255, | |
rotation_range=20, | |
width_shift_range=0.2, | |
height_shift_range=0.2, | |
shear_range=0.2, | |
zoom_range=0.2, | |
horizontal_flip=True, | |
fill_mode='nearest' | |
) | |
test_gen = ImageDataGenerator(rescale = 1.0 / 255) | |
# Use flow from directory to capture the images distributed in our 6 classes | |
train_generator = datagen.flow_from_directory( | |
"/content/output/train", | |
batch_size = 32, | |
target_size = (150,150), | |
class_mode = "categorical" | |
) | |
validation_generator = test_gen.flow_from_directory( | |
"/content/output/val", | |
batch_size = 32, | |
target_size = (150,150), | |
class_mode = "categorical" | |
) | |
test_generator = test_gen.flow_from_directory( | |
"/content/seg_test/seg_test", | |
target_size=(150, 150), | |
shuffle = False, | |
class_mode='categorical', | |
batch_size=1) | |
# Building Model | |
input_size = 150 | |
model = Sequential([ | |
Conv2D(input_shape=(input_size,input_size,3),filters = 32,kernel_size=(3,3),padding = 'same',activation='relu'), | |
MaxPool2D(), | |
Conv2D(filters= 64,kernel_size=(3,3),activation='relu',padding='same'), | |
MaxPool2D(), | |
Conv2D(filters = 64,kernel_size=(3,3),activation='relu',padding='same'), | |
MaxPool2D(), | |
Conv2D(filters = 128,kernel_size=(3,3),activation='relu',padding = 'same'), | |
MaxPool2D(), | |
Conv2D(filters = 256,kernel_size=(3,3),activation='relu',padding = 'same'), | |
MaxPool2D(), | |
Flatten(name='flatten'), | |
Dense(6, activation = 'softmax') | |
]) | |
model.summary() | |
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), | |
loss = 'categorical_crossentropy', | |
metrics = ['accuracy']) | |
history = model.fit(train_generator, steps_per_epoch = 11224 // 32, | |
validation_steps = 2810 // 32, | |
epochs = 15, # Number of times to iterate over the entire dataset | |
validation_data = validation_generator) # Optional validation data | |
plt.plot(history.history['accuracy'], label='Training Accuracy') | |
plt.plot(history.history['val_accuracy'], label='Validation Accuracy') | |
plt.xlabel('Epoch') | |
plt.ylabel('Accuracy') | |
plt.title('Training and Validation Accuracy') | |
plt.legend() | |
plt.show() | |
# Evaluating the model | |
evaluation_results = model.evaluate(test_generator) | |
print("Test Loss:", evaluation_results[0]) | |
print("Test Accuracy:", evaluation_results[1]) | |
# Save the model | |
model.save('/content/output') | |
# Convert the model 1 | |
converter = tf.lite.TFLiteConverter.from_keras_model(model) | |
tflite_model = converter.convert() | |
# Save the tflite model. | |
with open('model.tflite', 'wb') as f: | |
f.write(tflite_model) | |
# Making Predictions | |
plt.figure(figsize=(16, 8)) | |
imags=["/content/seg_test/seg_test/buildings/20064.jpg", | |
"/content/seg_test/seg_test/forest/20100.jpg", | |
"/content/seg_test/seg_test/forest/20166.jpg", | |
"/content/seg_test/seg_test/glacier/20109.jpg", | |
"/content/seg_test/seg_test/glacier/20198.jpg", | |
"/content/seg_test/seg_test/mountain/20071.jpg", | |
"/content/seg_test/seg_test/sea/20081.jpg", | |
"/content/seg_test/seg_test/street/20070.jpg", | |
"/content/seg_test/seg_test/street/20127.jpg"] | |
labels = {0:"buildings",1:"forest",2:"glacier",3:"mountain",4:"sea",5:"street"} | |
for i in range(9): | |
img = image.load_img(imags[i], target_size=(150, 150)) | |
plt.subplot(3,3, i + 1) | |
plt.imshow(img) | |
img_array = image.img_to_array(img) | |
img_array = np.expand_dims(img_array, axis=0) | |
img_array /= 255.0 | |
predictions = model.predict(img_array) | |
plt.title(f"Image of {labels[np.argmax(predictions)]}") | |
plt.axis("off") |
這個腳本利用深度學習(Deep Learning)來訓練並產生圖像分類模型(Image Classification Model)。
這裡使用 Keras 的卷積函數Conv2D(),來實現一個簡單的卷積神經網路(Convolutional Neural Network, CNN),然後在 Intel Image Classification 圖像資料集的基礎上,來訓練和評估此 CNN。
註:Keras 是在TensorFlow 2上建立的高層API;它是一個高度抽象的深度學習框架,簡單易用。
參考文章:
接著在筆記本中,加入以下幾行 Linux 指令,這是用來將這個腳本複製到 Colab 執行階段的目錄下,同時初始化一些目錄的配置:
!cp -v /content/gdrive/MyDrive/tflite-model-maker-workaround/image_pred.py .
!mkdir -p seg_pred && ln -s /content/gdrive/MyDrive/tflite-model-maker-workaround/seg_pred ./seg_pred/seg_pred
!mkdir -p seg_train && ln -s /content/gdrive/MyDrive/tflite-model-maker-workaround/seg_train ./seg_train/seg_train
!mkdir -p seg_test && ln -s /content/gdrive/MyDrive/tflite-model-maker-workaround/seg_test ./seg_test/seg_test
!mkdir -p ./output/train
!mkdir -p ./output/val
腳本裡使用 splitfolders 函數,來將資料集分成訓練集和驗證集,比例為 80% 和 20%。
然後,建立一個名為 datagen 的圖像資料生成器(ImageDataGenerator),並設置多個參數,用於進行圖像資料增強(data augmentation)
每個參數的含義:
為什麼要進行這樣的正歸化操作呢?正歸化有助於訓練神經網絡,因為它可以使輸入數據的範圍保持一致,避免了某些特徵對訓練的主導影響。通常情況下,將圖像像素值縮放到 [0, 1] 或 [-1, 1] 的範圍內是一個常見的預處理步驟,以有助於神經網絡的收斂和訓練效果。
接下來,建立三個 DataGenerator,它們分别用於訓練集、驗證集和測試集。這些生成器將載入和預處理圖像,並以批量的形式提供給模型進行訓練和評估。
代碼中的 flow_from_directory 是從 ImageDataGenerator 抽取訓練樣本,供模型訓練每個epoch時使用。用於解決資料集過大,無法一次性載入記憶體的困境。
# Scaling | |
datagen = ImageDataGenerator( | |
rescale=1.0/255, | |
rotation_range=20, | |
width_shift_range=0.2, | |
height_shift_range=0.2, | |
shear_range=0.2, | |
zoom_range=0.2, | |
horizontal_flip=True, | |
fill_mode='nearest' | |
) | |
test_gen = ImageDataGenerator(rescale = 1.0 / 255) | |
# Use flow from directory to capture the images distributed in our 6 classes | |
train_generator = datagen.flow_from_directory( | |
"/content/output/train", | |
batch_size = 32, | |
target_size = (150,150), | |
class_mode = "categorical" | |
) | |
validation_generator = test_gen.flow_from_directory( | |
"/content/output/val", | |
batch_size = 32, | |
target_size = (150,150), | |
class_mode = "categorical" | |
) | |
test_generator = test_gen.flow_from_directory( | |
"/content/seg_test/seg_test", | |
target_size=(150, 150), | |
shuffle = False, | |
class_mode='categorical', | |
batch_size=1) |
以下代碼用來建立模型:
# Building Model | |
input_size = 150 | |
model = Sequential([ | |
Conv2D(input_shape=(input_size,input_size,3),filters = 32,kernel_size=(3,3),padding = 'same',activation='relu'), | |
MaxPool2D(), | |
Conv2D(filters= 64,kernel_size=(3,3),activation='relu',padding='same'), | |
MaxPool2D(), | |
Conv2D(filters = 64,kernel_size=(3,3),activation='relu',padding='same'), | |
MaxPool2D(), | |
Conv2D(filters = 128,kernel_size=(3,3),activation='relu',padding = 'same'), | |
MaxPool2D(), | |
Conv2D(filters = 256,kernel_size=(3,3),activation='relu',padding = 'same'), | |
MaxPool2D(), | |
Flatten(name='flatten'), | |
Dense(6, activation = 'softmax') | |
]) | |
model.summary() | |
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), | |
loss = 'categorical_crossentropy', | |
metrics = ['accuracy']) | |
history = model.fit(train_generator, steps_per_epoch = 11224 // 32, | |
validation_steps = 2810 // 32, | |
epochs = 15, # Number of times to iterate over the entire dataset | |
validation_data = validation_generator) # Optional validation data |
這裡使用 Keras 建立一個卷積神經網絡(CNN)模型,用於處理影像分類任務。首先從 Keras 函式庫中引入了 Sequential 模型類別以及幾個常用的網絡層,包括 Conv2D(二維卷積層)、MaxPool2D(二維最大池化層)、Flatten(扁平化層)和 Dense(全連接層)。
首先建立一個序列模型,並將其存儲在 model 變數中。模型中的各個網絡層將按照順序添加到這個序列模型中。
這裡添加的第一個卷積層,使用 32 個大小為 3x3 的過濾器,並使用 ReLU 激活函數。
input_shape 設置為 (input_size, input_size, 3),表示輸入的影像尺寸為 input_size x input_size,且具有 3 個通道(RGB 彩色影像)。padding='same' 表示使用補零(zero-padding)使輸入和輸出的尺寸保持一致。接著,添加了一個最大池化層,這將減少特徵圖的尺寸。
接下來,重複添加更多的卷積和池化層。每個卷積層都會增加過濾器的數量,這樣模型可以捕捉更多的特徵。在最後一個池化層後,添加了一個扁平化層,將特徵圖轉換為一維的向量,以便與後面的全連接層進行連接。
最後一層是全連接層,輸出的節點數目為 6,並使用 Softmax 激活函數,這將產生一個 6 維的向量,表示輸入影像屬於 6 個不同類別中的每個類別的概率。
總結而言,這腳本中定義了一個簡單的卷積神經網絡,用於影像分類。模型以卷積和池化層逐漸提取影像特徵,然後使用全連接層進行分類。該模型的輸入影像尺寸為 150x150,輸出為 6 個類別的概率分佈。
model.summary() 可以顯示模型的摘要信息,包括每一層的名稱、輸出形狀、參數數量等。
使用 compile 方法對模型進行編譯。這裡使用 Adam 優化器,設定學習率為 0.001。優化器用於調整模型參數以最小化損失函數。然後設定損失函數為 categorical crossentropy,這是在多類別分類任務中常用的損失函數。最後指定要在訓練過程中追蹤的評價指標,這裡選擇了準確度(accuracy)。
# Evaluating the model | |
evaluation_results = model.evaluate(test_generator) | |
print("Test Loss:", evaluation_results[0]) | |
print("Test Accuracy:", evaluation_results[1]) | |
# Save the model | |
model.save('/content/output') | |
# Convert the model 1 | |
converter = tf.lite.TFLiteConverter.from_keras_model(model) | |
tflite_model = converter.convert() | |
# Save the tflite model. | |
with open('model.tflite', 'wb') as f: | |
f.write(tflite_model) |
這部分代碼評估了訓練好的模型在測試集上的性能。model.evaluate(test_generator) 會使用測試數據生成器 test_generator 來評估模型,並返回一個包含損失值和準確度的列表。然後,這些值被印出,顯示測試集上的損失和準確度。
然後將訓練好的模型保存到指定的路徑 '/content/output' 文件夾中,這時可以在 Colab 側邊欄的 output 文件夾下,看到一個名為 saved_model.pb 的模型文件。
因為我們之後要將這個模型集成到 Android 應用程式上,所以這裡使用 TensorFlow Lite 轉換器,來將 Keras 模型轉換為 TensorFlow Lite 模型(.tflite 文件格式)。
首先,使用 tf.lite.TFLiteConverter.from_keras_model(model) 創建一個轉換器對象,然後將 Keras 模型 model 傳遞給這個轉換器。接著,使用 converter.convert() 方法將模型進行轉換,就能獲得轉換後的 TensorFlow Lite 模型了。
最後,將轉換後的 TensorFlow Lite 模型保存到名為 'model.tflite' 的文件中。然後我們就可以下載這個模型,以便在資源有限的嵌入式設備上運行。
參考資料:
接著只要在 Colab 筆記本上執行代碼,就能產生我們要的模型了。在這個例子中,大約需要數十分鐘的執行時間。我們透過 Keras 的 model.summary() 函數生成的模型架構摘要中,顯示了每一層的名稱、輸出形狀和參數數量,可以方便了解模型的設計和配置。
可以看到 conv2d 層的輸出形狀是 (None, 150, 150, 32),表示輸出是一個 150x150 的特徵圖,具有 32 個通道。同樣地,max_pooling2d 層的輸出形狀是 (None, 75, 75, 32),表示尺寸被減半並且通道數不變。
flatten 層將最後一個池化層的輸出扁平化為一維向量,形狀為 (None, 4096)。最後的 dense 層是全連接層,輸出形狀是 (None, 6),表示最終模型有 6 個輸出節點(6 個類別)。
如何將模型集成到 Android 應用程式上?簡單來說就是需要一個可以開啟相機鏡頭,並從鏡頭看到的畫面進行預測,或者是從行動裝置的相簿裡選擇一張照片來進行預測。
這裡我只需要一個簡單的 Sample APP 來測試模型,所以沒必要花時間自己去寫一個 APP。我在 Tensorflow 教學網站上,找到一個簡單的 Android 範例(也有 iOS 和 Raspberry PI 的範例)。我們可以直接下載這個 Sample Code 來使用!
直接下載這個範例,然後在 Android Studio 開發環境中打開專案,選擇裡面的 finish 模塊,編譯並執行到手機就行了!
我在 build 這個範例時,遇到了兩個問題和錯誤, 其中一個是:Unable to make field private final java.lang.String java.io.File.path accessible: module java.base does not "opens java.io" to unnamed module
解決方式:Fix it by upgrading Grade Android Plugin Version to 7.0.3 in android/build.gradle
另一個問題是:java.lang.IllegalAccessError: class org.jetbrains.kotlin.kapt3.base.KaptContext (in unnamed module @0x74f83670) cannot access class com.sun.tools.javac.util.Context (in module jdk.compiler) because module jdk.compiler does not export com.sun.tools.javac.util to unnamed module
解決方式:Upgrade Kotlin Gradle Plugin Version to 1.6.0+
解決了以上問題後,將稍早轉出來的TFLite模型,複製到專案的ml文件夾中,但是 Android Studio 編輯器上卻顯示了 Type mismatch 的錯誤信息,內容如下:
import tensorflow as tf | |
assert tf.__version__.startswith('2') | |
from tflite_support.metadata_writers import image_classifier | |
from tflite_support.metadata_writers import writer_utils | |
labels_list = ['buildings','forest','glacier','mountain','sea','street'] | |
with open('labels.txt', 'w') as labels_file: | |
for label in labels_list: | |
labels_file.write(label + "\n") | |
# Generate the metadata | |
ImageClassifierWriter = image_classifier.MetadataWriter | |
# Normalization parameters are required when processing the image | |
# https://www.tensorflow.org/lite/convert/metadata#normalization_and_quantization_parameters) | |
_INPUT_NORM_MEAN = 127.5 | |
_INPUT_NORM_STD = 127.5 | |
_TFLITE_MODEL_PATH = "model.tflite" | |
_LABELS_FILE = "labels.txt" | |
_TFLITE_METADATA_MODEL_PATHS = "convertered_model_with_metadata.tflite" | |
# Create the metadata writer | |
metadata_generator = ImageClassifierWriter.create_for_inference( | |
writer_utils.load_file(_TFLITE_MODEL_PATH), | |
[_INPUT_NORM_MEAN], [_INPUT_NORM_STD], | |
[_LABELS_FILE]) | |
# Verify the metadata generated | |
print("Verify the metadata generated") | |
print(metadata_generator.get_metadata_json()) | |
# Integrate the metadata into the TFlite model | |
writer_utils.save_file(metadata_generator.populate(), _TFLITE_METADATA_MODEL_PATHS) |
import streamlit as st | |
import tensorflow as tf | |
import os | |
import numpy as np | |
from pathlib import Path | |
# temp_path = Path(__file__).parent / "tempDir" | |
class_names = ["buildings", "forest", "glacier", "mountain", "sea", "street"] | |
## Page Title | |
st.set_page_config(page_title = "Image Classification") | |
st.title("Image Classification") | |
st.markdown("---") | |
## Sidebar | |
# st.sidebar.header("TF Lite Models") | |
# display = ("Select a Model", "Created FP-16 Quantized Model", "Created Quantized Model", "Created Dynamic Range Quantized Model") | |
# options = list(range(len(display))) | |
# value = st.sidebar.selectbox("Model", options, format_func=lambda x: display[x]) | |
# print(value) | |
tflite_interpreter = tf.lite.Interpreter(model_path='Models/image_classify_model.tflite') | |
tflite_interpreter.allocate_tensors() | |
# if value == 1: | |
# tflite_interpreter = tf.lite.Interpreter(model_path='Models/model_fp16.tflite') | |
# tflite_interpreter.allocate_tensors() | |
# if value == 2: | |
# tflite_interpreter = tf.lite.Interpreter(model_path='Models/model_int8.tflite') | |
# tflite_interpreter.allocate_tensors() | |
# if value == 3: | |
# tflite_interpreter = tf.lite.Interpreter(model_path='Models/model_dynamic.tflite') | |
# tflite_interpreter.allocate_tensors() | |
# if value == 4: | |
# tflite_interpreter = tf.lite.Interpreter(model_path='Models\created_model_fp16.tflite') | |
# tflite_interpreter.allocate_tensors() | |
# if value == 5: | |
# tflite_interpreter = tf.lite.Interpreter(model_path='Models\created_model_int8.tflite') | |
# tflite_interpreter.allocate_tensors() | |
# if value == 6: | |
# tflite_interpreter = tf.lite.Interpreter(model_path='Models\created_model_dynamic.tflite') | |
# tflite_interpreter.allocate_tensors() | |
def set_input_tensor(interpreter, image): | |
"""Sets the input tensor.""" | |
tensor_index = interpreter.get_input_details()[0]['index'] | |
input_tensor = interpreter.tensor(tensor_index)()[0] | |
input_tensor[:, :, :] = image | |
def get_predictions(input_image): | |
output_details = tflite_interpreter.get_output_details() | |
set_input_tensor(tflite_interpreter, input_image) | |
tflite_interpreter.invoke() | |
tflite_model_prediction = tflite_interpreter.get_tensor(output_details[0]["index"]) | |
tflite_model_prediction = tflite_model_prediction.squeeze().argmax(axis = 0) | |
pred_class = class_names[tflite_model_prediction] | |
return pred_class | |
## Input Fields | |
uploaded_file = st.file_uploader("Upload a Image", type=["jpg","png", 'jpeg']) | |
if uploaded_file is not None: | |
with open(os.path.join("Models",uploaded_file.name),"wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
path = os.path.join("Models",uploaded_file.name) | |
img = tf.keras.preprocessing.image.load_img(path , grayscale=False, color_mode='rgb', target_size=(150,150,3), interpolation='nearest') | |
st.image(img) | |
# if value == 2 or value == 5: | |
# img = tf.image.convert_image_dtype(img, tf.uint8) | |
img_array = tf.keras.preprocessing.image.img_to_array(img) | |
img_array = tf.expand_dims(img_array, 0) | |
if st.button("Get Predictions"): | |
suggestion = get_predictions(input_image =img_array) | |
st.success(suggestion) |
from PIL import Image | |
import glob | |
import os | |
from pathlib import Path | |
import matplotlib.pyplot as plt | |
import tensorflow as tf | |
assert tf.__version__.startswith('2') | |
from tflite_model_maker import model_spec | |
from tflite_model_maker import image_classifier | |
from tflite_model_maker.config import ExportFormat | |
from tflite_model_maker.config import QuantizationConfig | |
from tflite_model_maker.image_classifier import DataLoader | |
#Loading dataset using the Dataloader | |
data = DataLoader.from_folder('/content/seg_train/seg_train') | |
#Splitting dataset into training, validation and testing data | |
train_data, test_data = data.split(0.9) | |
#Training the model | |
model = image_classifier.create(train_data, model_spec=model_spec.get('efficientnet_lite0'), train_whole_model=True) | |
#Have a look at the detailed model structure | |
model.summary() | |
loss, accuracy = model.evaluate(test_data) | |
print("Test Loss:", loss) | |
print("Test Accuracy:", accuracy) | |
model.export(export_dir='.') |