Remove device.advertising_id across all tables from a BigQuery Firebase export using the BQ command line tool

Thu, 14 February 2019

remove_advertising_id.sh
#!/usr/bin/env bash

DATASET_NAME="<DATASET NAME HERE>"
PROJECT_ID="<PROJECT ID HERE>"

# Don't touch below.
BUCKET_BASE_PATH="gs://${BUCKET_NAME}/"
TABLE_LIST_FILE_NAME="/tmp/bq-extract-${DATASET_NAME}.txt"
rm ${TABLE_LIST_FILE_NAME} | grep "Added for failure suppression"

echo "Starting: Reading tables from ${DATASET_NAME}"
EXPORTED=$(bq ls -a -n 1000 ${DATASET_NAME} > ${TABLE_LIST_FILE_NAME})
echo "Finished: Reading tables from ${DATASET_NAME}"

trim() {
    local trimmed="$1"
    while [[ $trimmed == ' '* ]]; do
       trimmed="${trimmed## }"
    done
    while [[ $trimmed == *' ' ]]; do
        trimmed="${trimmed%% }"
    done
    echo "$trimmed"
}

EMPTY_SPACE=""
while IFS='' read -r SINGLE_LINE || [[ -n "$SINGLE_LINE" ]]; do
  if [[ "$SINGLE_LINE" == *"events_"* ]]; then
    if [[ "$SINGLE_LINE" != *"intraday_"* ]]; then
      SINGLE_LINE=${SINGLE_LINE//TABLE/$EMPTY_SPACE}
      SINGLE_TABLE=$(trim "${SINGLE_LINE}")
      echo "Starting: Table ${SINGLE_TABLE}"
      COMMAND="bq extract --destination_format NEWLINE_DELIMITED_JSON --compression GZIP "${DATASET_NAME}.${SINGLE_TABLE}" ${BUCKET_BASE_PATH}${SINGLE_TABLE}_*.json.gzip"
      $(${COMMAND})
      echo "Finished: Table ${SINGLE_TABLE}"
    fi
  fi
done < "${TABLE_LIST_FILE_NAME}"

rm ${TABLE_LIST_FILE_NAME}