Articles

Provisioning Azure Databricks and PAT tokens with Terraform

See Part 1, Using Azure AD With The Azure Databricks API, for a background on the Azure AD authentication mechanism for Databricks.

Here we show how to bootstrap the provisioning of an Azure Databricks workspace and generate a PAT Token that can be used by downstream applications.

Create a script generate-pat-token.sh with the following content.

#!/bin/sh
  
# Bash strict mode, stop on any error
set -euo pipefail

# Ensure all required environment variables are present
test -n "$DATABRICKS_WORKSPACE_RESOURCE_ID"
test -n "$KEY_VAULT"
test -n "$SECRET_NAME"
test -n "$ARM_CLIENT_ID"
test -n "$ARM_CLIENT_SECRET"
test -n "$ARM_TENANT_ID"

# Login
az login --service-principal -u "$ARM_CLIENT_ID" -p "$ARM_CLIENT_SECRET" -t "$ARM_TENANT_ID"

# Get a token for the global Databricks application.
# The resource name is fixed and never changes.
token_response=$(az account get-access-token --resource 2ff814a6-3304-4ab8-85cb-cd0e6f879c1d)
token=$(jq .accessToken -r <<< "$token_response")

# Get a token for the Azure management API
token_response=$(az account get-access-token --resource https://management.core.windows.net/)
azToken=$(jq .accessToken -r <<< "$token_response")

# Generate a PAT token. Note the quota limit of 600 tokens.
api_response=$(curl -sf $DATABRICKS_ENDPOINT/api/2.0/token/create \
  -H "Authorization: Bearer $token" \
  -H "X-Databricks-Azure-SP-Management-Token:$azToken" \
  -H "X-Databricks-Azure-Workspace-Resource-Id:$DATABRICKS_WORKSPACE_RESOURCE_ID" \
  -d '{ "comment": "Terraform-generated token" }')
pat_token=$(jq .token_value -r <<< "$api_response")

az keyvault secret set --vault-name "$KEY_VAULT" -n "$SECRET_NAME" --value "$pat_token"

Here is a sample Terraform configuration that will create an Azure Key Vault and a Databricks workspace, generate a Databricks PAT token and store it in the Key Vault. You can connect the Key Vault to an Azure Data Factory, or read the token from another script.

# Configure the Azure Provider
provider "azurerm" {
  # It is recommended to pin to a given version of the Provider
  version = "=1.38.0"
}

provider "null" {
  version = "~> 2.1"
}

data "azurerm_client_config" "current" {}

resource "azurerm_databricks_workspace" "databricks" {
  name                = "dbricks${var.appname}${var.environment}"
  resource_group_name = var.resource_group_name
  location            = var.location
  sku                 = "standard"
}

data "azurerm_resources" "databricks" {
  resource_group_name = var.resource_group_name
  type = "Microsoft.Databricks/workspaces"
  name = azurerm_databricks_workspace.databricks.name
}

resource "azurerm_key_vault" "databricks_token" {
  name                = "kv-${var.appname}-${var.environment}"
  location            = var.location
  resource_group_name = var.resource_group_name
  tenant_id           = var.tenant_id

  sku_name = "standard"

  network_acls {
    default_action = "Allow"
    bypass         = "None"
  }
}

resource "azurerm_key_vault_access_policy" "terraform" {
  key_vault_id = azurerm_key_vault.databricks_token.id

  tenant_id = azurerm_key_vault.databricks_token.tenant_id
  object_id = data.azurerm_client_config.current.object_id

  secret_permissions = [
    "set",
  ]
}

resource "null_resource" "databricks_token" {
  triggers = {
    workspace = azurerm_databricks_workspace.databricks.id
    key_vault_access = azurerm_key_vault_access_policy.terraform.id
  }
  provisioner "local-exec" {
    command = "${path.module}/generate-pat-token.sh"
    environment = {
      RESOURCE_GROUP = var.resource_group_name
      DATABRICKS_WORKSPACE_RESOURCE_ID = azurerm_databricks_workspace.databricks.id
      KEY_VAULT = azurerm_key_vault.databricks_token.name
      SECRET_NAME = "DATABRICKS-TOKEN"
      DATABRICKS_ENDPOINT = "https://${data.azurerm_resources.databricks.resources[0].location}.azuredatabricks.net"
      # ARM_CLIENT_ID, ARM_CLIENT_SECRET, ARM_TENANT_ID are already 
      # present in the environment if you are using the Terraform
      # extension for Azure DevOps or the starter from 
      # https://github.com/algattik/terraform-azure-pipelines-starter.
      # Otherwise, provide them as additional variables.
    }
  }
}

Alexandre Gattiker
Software Engineer at Microsoft, Data & AI, open source fan
https://cloudarchitected.com

Leave a Reply

Your email address will not be published. Required fields are marked *