Build an Image Recognition API with Go and TensorFlow

October 18, 2017

This tutorial shows how to build an image recognition service in Go using pre-trained TensorFlow Inception-V3 model. The service will run inside a Docker container, use TensorFlow Go package to process images and return labels that best describe them.

Full source code is available on GitHub.

Getting started

Install Docker and Docker Compose.

Configure container

Inside project's root directory create docker-compose.yaml file.

version: '3.3'
services:
  api:
    container_name: 'api'
    build: './api'
    ports:
      - '8080:8080'
    volumes:
      - './api:/go/src/app'

Create api/Dockerfile file. It uses official TensorFlow Docker image as its base image. You'll also need to install TensorFlow C library to use Go bindings.

FROM tensorflow/tensorflow
# Install TensorFlow C library
RUN curl -L \
   "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-linux-x86_64-1.3.0.tar.gz" | \
   tar -C "/usr/local" -xz
RUN ldconfig
# Hide some warnings
ENV TF_CPP_MIN_LOG_LEVEL 2

Next, in the same file, install Go itself. This configuration is copied from official Docker golang image.

RUN apt-get update && apt-get install -y --no-install-recommends \
    g++ \
    gcc \
    libc6-dev \
    make \
    pkg-config \
    wget \
    git \
  && rm -rf /var/lib/apt/lists/*

ENV GOLANG_VERSION 1.9.1
RUN set -eux; \
  \
  dpkgArch="$(dpkg --print-architecture)"; \
  case "${dpkgArch##*-}" in \
    amd64) goRelArch='linux-amd64'; goRelSha256='07d81c6b6b4c2dcf1b5ef7c27aaebd3691cdb40548500941f92b221147c5d9c7' ;; \
    armhf) goRelArch='linux-armv6l'; goRelSha256='65a0495a50c7c240a6487b1170939586332f6c8f3526abdbb9140935b3cff14c' ;; \
    arm64) goRelArch='linux-arm64'; goRelSha256='d31ecae36efea5197af271ccce86ccc2baf10d2e04f20d0fb75556ecf0614dad' ;; \
    i386) goRelArch='linux-386'; goRelSha256='2cea1ce9325cb40839601b566bc02b11c92b2942c21110b1b254c7e72e5581e7' ;; \
    ppc64el) goRelArch='linux-ppc64le'; goRelSha256='de57b6439ce9d4dd8b528599317a35fa1e09d6aa93b0a80e3945018658d963b8' ;; \
    s390x) goRelArch='linux-s390x'; goRelSha256='9adf03574549db82a72e0d721ef2178ec5e51d1ce4f309b271a2bca4dcf206f6' ;; \
    *) goRelArch='src'; goRelSha256='a84afc9dc7d64fe0fa84d4d735e2ece23831a22117b50dafc75c1484f1cb550e'; \
      echo >&2; echo >&2 "warning: current architecture ($dpkgArch) does not have a corresponding Go binary release; will be building from source"; echo >&2 ;; \
  esac; \
  \
  url="https://golang.org/dl/go${GOLANG_VERSION}.${goRelArch}.tar.gz"; \
  wget -O go.tgz "$url"; \
  echo "${goRelSha256} *go.tgz" | sha256sum -c -; \
  tar -C /usr/local -xzf go.tgz; \
  rm go.tgz; \
  \
  if [ "$goRelArch" = 'src' ]; then \
    echo >&2; \
    echo >&2 'error: UNIMPLEMENTED'; \
    echo >&2 'TODO install golang-any from jessie-backports for GOROOT_BOOTSTRAP (and uninstall after build)'; \
    echo >&2; \
    exit 1; \
  fi; \
  \
  export PATH="/usr/local/go/bin:$PATH"; \
  go version

ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH

RUN mkdir -p "$GOPATH/src" "$GOPATH/bin" && chmod -R 777 "$GOPATH"

Install necessary Go packages.

RUN go get github.com/tensorflow/tensorflow/tensorflow/go \
  github.com/tensorflow/tensorflow/tensorflow/go/op \
  github.com/julienschmidt/httprouter

Download Inception model archive to the /model directory inside the container and extract it.

RUN mkdir -p /model && \
  wget "https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip" -O /model/inception.zip && \
  unzip /model/inception.zip -d /model && \
  chmod -R 777 /model

Create a user.

RUN adduser --disabled-password --gecos '' api
USER api

Copy over source files, install the app and run it.

WORKDIR "/go/src/app"
COPY . .
RUN go install -v ./...
CMD [ "app" ]

Create ap/main.go file.

package main

import "fmt"

func main() {
  fmt.Println("Hello, Gophers")
}

Now build the container and make sure everything works.

docker-compose -f docker-compose.yaml up --build

Loading TensorFlow model

Extracted files inside /model include a serialized TensorFlow graph and a list of all labels, which can be inferred from images. You need to read these files and parse them.

Declare some variables and update the main function.

var (
  graph  *tf.Graph
  labels []string
)

func main() {
  if err := loadModel(); err != nil {
    log.Fatal(err)
    return
  }
}

Imports are skipped for brevity. You can use goimports tool to add them. Your code editor most likely has a plugin for it.

Write the loadModel function.

func loadModel() error {
  // Load inception model
  model, err := ioutil.ReadFile("/model/tensorflow_inception_graph.pb")
  if err != nil {
    return err
  }
  graph = tf.NewGraph()
  if err := graph.Import(model, ""); err != nil {
    return err
  }
  // Load labels
  labelsFile, err := os.Open("/model/imagenet_comp_graph_label_strings.txt")
  if err != nil {
    return err
  }
  defer labelsFile.Close()
  scanner := bufio.NewScanner(labelsFile)
  // Labels are separated by newlines
  for scanner.Scan() {
    labels = append(labels, scanner.Text())
  }
  if err := scanner.Err(); err != nil {
    return err
  }
  return nil
}

Uploading images

Write some utility functions for later use.

func responseError(w http.ResponseWriter, message string, code int) {
  w.Header().Set("Content-Type", "application/json")
  w.WriteHeader(code)
  json.NewEncoder(w).Encode(map[string]string{"error": message})
}

func responseJSON(w http.ResponseWriter, data interface{}) {
  w.Header().Set("Content-Type", "application/json")
  json.NewEncoder(w).Encode(data)
}

func main() {
  if err := loadModel(); err != nil {
    log.Fatal(err)
    return
  }
  r := httprouter.New()
  r.POST("/recognize", recognizeHandler)
  log.Fatal(http.ListenAndServe(":8080", r))
}

Write recognizeHandler function. Image files will be located under form key "image".

func recognizeHandler(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  // Read image
  imageFile, header, err := r.FormFile("image")
  // Will contain filename and extension
  imageName := strings.Split(header.Filename, ".")
  if err != nil {
    responseError(w, "Could not read image", http.StatusBadRequest)
    return
  }
  defer imageFile.Close()
  var imageBuffer bytes.Buffer
  // Copy image data to a buffer
  io.Copy(&imageBuffer, imageFile)

  // ...
}

Normalizing images

Before classifying an image, you need to convert it to a tensor and normalize it, because Inception model expects it to be in a certain format.

In the recognizeHandler call makeTensorFromImage, passing in the buffer and file extension.

tensor, err := makeTensorFromImage(&imageBuffer, imageName[:1][0])
if err != nil {
  responseError(w, "Invalid image", http.StatusBadRequest)
  return
}

Write makeTensorFromImage function which runs an image tensor through the normalization graph.

func makeTensorFromImage(imageBuffer *bytes.Buffer, imageFormat string) (*tf.Tensor, error) {
  tensor, err := tf.NewTensor(imageBuffer.String())
  if err != nil {
    return nil, err
  }
  graph, input, output, err := makeTransformImageGraph(imageFormat)
  if err != nil {
    return nil, err
  }
  session, err := tf.NewSession(graph, nil)
  if err != nil {
    return nil, err
  }
  defer session.Close()
  normalized, err := session.Run(
    map[tf.Output]*tf.Tensor{input: tensor},
    []tf.Output{output},
    nil)
  if err != nil {
    return nil, err
  }
  return normalized[0], nil
}

Write makeTransformImageGraph function to construct a graph which scales images to 224x224 and normalizes their pixel values.

func makeTransformImageGraph(imageFormat string) (graph *tf.Graph, input, output tf.Output, err error) {
  const (
    H, W  = 224, 224
    Mean  = float32(117)
    Scale = float32(1)
  )
  s := op.NewScope()
  input = op.Placeholder(s, tf.String)
  // Decode PNG or JPEG
  var decode tf.Output
  if imageFormat == "png" {
    decode = op.DecodePng(s, input, op.DecodePngChannels(3))
  } else {
    decode = op.DecodeJpeg(s, input, op.DecodeJpegChannels(3))
  }
  // Div and Sub perform (value-Mean)/Scale for each pixel
  output = op.Div(s,
    op.Sub(s,
      // Resize to 224x224 with bilinear interpolation
      op.ResizeBilinear(s,
        // Create a batch containing a single image
        op.ExpandDims(s,
          // Use decoded pixel values
          op.Cast(s, decode, tf.Float),
          op.Const(s.SubScope("make_batch"), int32(0))),
        op.Const(s.SubScope("size"), []int32{H, W})),
      op.Const(s.SubScope("mean"), Mean)),
    op.Const(s.SubScope("scale"), Scale))
  graph, err = s.Finalize()
  return graph, input, output, err
}

Running inference

Back in recognizeHandler function, run normalized image tensor through the Inception model graph.

session, err := tf.NewSession(graph, nil)
if err != nil {
  log.Fatal(err)
}
defer session.Close()
output, err := session.Run(
  map[tf.Output]*tf.Tensor{
    graph.Operation("input").Output(0): tensor,
  },
  []tf.Output{
    graph.Operation("output").Output(0),
  },
  nil)
if err != nil {
  responseError(w, "Could not run inference", http.StatusInternalServerError)
  return
}

The output[0].Value() tensor now contains probabilities of each label. The probability represents how well a label describes the image.

Finding best labels

Lastly, return top 5 labels.

Declare some structs to hold the response data.

type ClassifyResult struct {
  Filename string        `json:"filename"`
  Labels   []LabelResult `json:"labels"`
}

type LabelResult struct {
  Label       string  `json:"label"`
  Probability float32 `json:"probability"`
}

Write findBestLabels function which finds best labels and their probabilities.

type ByProbability []LabelResult
func (a ByProbability) Len() int           { return len(a) }
func (a ByProbability) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
func (a ByProbability) Less(i, j int) bool { return a[i].Probability > a[j].Probability }

func findBestLabels(probabilities []float32) []LabelResult {
  // Make a list of label/probability pairs
  var resultLabels []LabelResult
  for i, p := range probabilities {
    if i >= len(labels) {
      break
    }
    resultLabels = append(resultLabels, LabelResult{Label: labels[i], Probability: p})
  }
  // Sort by probability
  sort.Sort(ByProbability(resultLabels))
  // Return top 5 labels
  return resultLabels[:5]
}

Write the response at the end of recognizeHandler function.

responseJSON(w, ClassifyResult{
  Filename: header.Filename,
  Labels:   findBestLabels(output[0].Value().([][]float32)[0]),
})

Rebuild the container.

docker-compose -f docker-compose.yaml up -d --build

Try calling the localhost:8080/recognize endpoint with a couple of images.

curl localhost:8080/recognize -F 'image=@./cat.jpg'
{
  "filename": "cat.jpg",
  "labels": [
    { "label": "Egyptian cat", "probability": 0.39229771 },
    { "label": "weasel", "probability": 0.19872947 },
    { "label": "Arctic fox", "probability": 0.14527217 },
    { "label": "tabby", "probability": 0.062454574 },
    { "label": "kit fox", "probability": 0.043656528 }
  ]
}

Wrapping up

You now have a fully working image recognition service. If you want it to focus on a more niche domain (use specific labels instead of general ones), look into transfer learning here. You can find more TensorFlow models and examples and try to implement them in Go.

Full source code is available on GitHub.

TensorFlow

Build an Image Recognition API with Go and TensorFlow

Getting started

Configure container

Loading TensorFlow model

Uploading images

Normalizing images

Running inference

Finding best labels

Wrapping up

Newsletter

Get awesome articles delivered right to your doorstep

Related

How to Keep Your Development Environment Clean

Using GraphQL with Microservices in Go

How to Build a Search Service with Go and Elasticsearch