See Part 1, Using Azure AD With The Azure Databricks API, for a background on the Azure AD authentication mechanism for Databricks.
Here we show how to bootstrap the provisioning of an Azure Databricks workspace and generate a PAT Token that can be used by downstream applications.
Create a script generate-pat-token.sh
with the following content.
#!/bin/sh # Bash strict mode, stop on any error set -euo pipefail # Ensure all required environment variables are present test -n "$DATABRICKS_WORKSPACE_RESOURCE_ID" test -n "$KEY_VAULT" test -n "$SECRET_NAME" test -n "$ARM_CLIENT_ID" test -n "$ARM_CLIENT_SECRET" test -n "$ARM_TENANT_ID" # Login az login --service-principal -u "$ARM_CLIENT_ID" -p "$ARM_CLIENT_SECRET" -t "$ARM_TENANT_ID" # Get a token for the global Databricks application. # The resource name is fixed and never changes. token_response=$(az account get-access-token --resource 2ff814a6-3304-4ab8-85cb-cd0e6f879c1d) token=$(jq .accessToken -r <<< "$token_response") # Get a token for the Azure management API token_response=$(az account get-access-token --resource https://management.core.windows.net/) azToken=$(jq .accessToken -r <<< "$token_response") # Generate a PAT token. Note the quota limit of 600 tokens. api_response=$(curl -sf $DATABRICKS_ENDPOINT/api/2.0/token/create \ -H "Authorization: Bearer $token" \ -H "X-Databricks-Azure-SP-Management-Token:$azToken" \ -H "X-Databricks-Azure-Workspace-Resource-Id:$DATABRICKS_WORKSPACE_RESOURCE_ID" \ -d '{ "comment": "Terraform-generated token" }') pat_token=$(jq .token_value -r <<< "$api_response") az keyvault secret set --vault-name "$KEY_VAULT" -n "$SECRET_NAME" --value "$pat_token"
Here is a sample Terraform configuration that will create an Azure Key Vault and a Databricks workspace, generate a Databricks PAT token and store it in the Key Vault. You can connect the Key Vault to an Azure Data Factory, or read the token from another script.
# Configure the Azure Provider provider "azurerm" { # It is recommended to pin to a given version of the Provider version = "=1.38.0" } provider "null" { version = "~> 2.1" } data "azurerm_client_config" "current" {} resource "azurerm_databricks_workspace" "databricks" { name = "dbricks${var.appname}${var.environment}" resource_group_name = var.resource_group_name location = var.location sku = "standard" } data "azurerm_resources" "databricks" { resource_group_name = var.resource_group_name type = "Microsoft.Databricks/workspaces" name = azurerm_databricks_workspace.databricks.name } resource "azurerm_key_vault" "databricks_token" { name = "kv-${var.appname}-${var.environment}" location = var.location resource_group_name = var.resource_group_name tenant_id = var.tenant_id sku_name = "standard" network_acls { default_action = "Allow" bypass = "None" } } resource "azurerm_key_vault_access_policy" "terraform" { key_vault_id = azurerm_key_vault.databricks_token.id tenant_id = azurerm_key_vault.databricks_token.tenant_id object_id = data.azurerm_client_config.current.object_id secret_permissions = [ "set", ] } resource "null_resource" "databricks_token" { triggers = { workspace = azurerm_databricks_workspace.databricks.id key_vault_access = azurerm_key_vault_access_policy.terraform.id } provisioner "local-exec" { command = "${path.module}/generate-pat-token.sh" environment = { RESOURCE_GROUP = var.resource_group_name DATABRICKS_WORKSPACE_RESOURCE_ID = azurerm_databricks_workspace.databricks.id KEY_VAULT = azurerm_key_vault.databricks_token.name SECRET_NAME = "DATABRICKS-TOKEN" DATABRICKS_ENDPOINT = "https://${data.azurerm_resources.databricks.resources[0].location}.azuredatabricks.net" # ARM_CLIENT_ID, ARM_CLIENT_SECRET, ARM_TENANT_ID are already # present in the environment if you are using the Terraform # extension for Azure DevOps or the starter from # https://github.com/algattik/terraform-azure-pipelines-starter. # Otherwise, provide them as additional variables. } } }