Gemini file upload in Elixir

If you want to send videos or images to the Google Gemini API, eg for video summarization, you need to upload the video to a separate endpoint and wait for it to get processed.

This is described in the Gemini docs, and it's fairly straightforward to translate into Elixir but in case you want to just grab the code I used(and save a few minutes of prompting an LLM :P), here is the code I used:

defmodule PodClipper.Gemini do
  require Logger

  @base_url "https://generativelanguage.googleapis.com"

  def upload_video(video_path) do
    mime_type = get_mime_type(video_path)
    num_bytes = File.stat!(video_path).size
    display_name = Path.basename(video_path)
    api_key = get_api_key()

    # Initial resumable request
    response =
      Req.post!(
        "#{@base_url}/upload/v1beta/files",
        params: [key: api_key],
        headers: [
          {"x-goog-upload-protocol", "resumable"},
          {"x-goog-upload-command", "start"},
          {"x-goog-upload-header-content-length", "#{num_bytes}"},
          {"x-goog-upload-header-content-type", mime_type},
          {"content-type", "application/json"}
        ],
        json: %{
          file: %{
            display_name: display_name
          }
        }
      )

    case get_upload_url(response) do
      {:ok, upload_url} ->
        # Upload the actual bytes
        response =
          Req.post!(upload_url,
            headers: [
              {"content-length", "#{num_bytes}"},
              {"x-goog-upload-offset", "0"},
              {"x-goog-upload-command", "upload, finalize"}
            ],
            body: File.read!(video_path)
          )

        case response.body do
          %{"file" => %{"uri" => file_uri}} -> {:ok, file_uri}
          _ -> {:error, "Failed to get file URI from response"}
        end

      error ->
        error
    end
  end

  def wait_for_processing(file_uri, poll_interval_ms \\ 1000) do
    case get_file_state(file_uri) do
      "PROCESSING" ->
        Logger.info("Processing video...")
        Process.sleep(poll_interval_ms)
        wait_for_processing(file_uri)

      "ACTIVE" ->
        {:ok, file_uri}

      state ->
        {:error, "Unexpected file state: #{state}"}
    end
  end

  defp get_file_state(file_uri) do
    response =
      Req.get!(file_uri,
        params: [key: get_api_key()])

    Map.get(response.body, "state")
  end

  defp get_upload_url(%Req.Response{} = response) do
    case Req.Response.get_header(response, "x-goog-upload-url") do
      [url | _] -> {:ok, url}
      e -> {:error, "Failed to get upload URL from response. #{inspect(e)}"}
    end
  end


  defp get_mime_type(file_path) do
    {mime_type, 0} = System.cmd("file", ["-b", "--mime-type", file_path])
    String.trim(mime_type)
  end

  defp get_api_key do
    System.get_env("GEMINI_API_KEY")
  end
end

Usage

{:ok, uri} = PodClipper.Gemini.upload_video("/Users/samrat/Downloads/348f603d-3a7b-4975-bf98-76fe7df03be9.mp4")

PodClipper.Gemini.wait_for_processing(uri)

Once I upload the image/video, I use it in an Instructor call but you could also directly call the Gemini chat completion endpoint.

  def describe_video(video_path) do
    with {:ok, file_uri} <- upload_video(video_path),
         {:ok, file_uri} <- wait_for_processing(file_uri) do
      generate_content(file_uri)
    end
  end

  defp generate_content(file_uri) do
    response =
      Req.post!(
        "#{@base_url}/v1beta/models/gemini-1.5-flash:generateContent",
        params: [key: get_api_key()],
        json: %{
          contents: [
            %{
              parts: [
                %{text: "Describe this video clip"},
                %{file_data: %{mime_type: "video/mp4", file_uri: file_uri}}
              ]
            }
          ]
        }
      )

    case response.body do
      %{"candidates" => [%{"content" => %{"parts" => [%{"text" => text}]}} | _]} ->
        {:ok, text}

      _ ->
        {:error, "Failed to parse response"}
    end
  end