Remove device.advertising_id across all tables from a BigQuery Firebase export using the BQ command line tool
Thu, 14 February 2019
remove_advertising_id.sh
#!/usr/bin/env bash DATASET_NAME="<DATASET NAME HERE>" PROJECT_ID="<PROJECT ID HERE>" # Don't touch below. BUCKET_BASE_PATH="gs://${BUCKET_NAME}/" TABLE_LIST_FILE_NAME="/tmp/bq-extract-${DATASET_NAME}.txt" rm ${TABLE_LIST_FILE_NAME} | grep "Added for failure suppression" echo "Starting: Reading tables from ${DATASET_NAME}" EXPORTED=$(bq ls -a -n 1000 ${DATASET_NAME} > ${TABLE_LIST_FILE_NAME}) echo "Finished: Reading tables from ${DATASET_NAME}" trim() { local trimmed="$1" while [[ $trimmed == ' '* ]]; do trimmed="${trimmed## }" done while [[ $trimmed == *' ' ]]; do trimmed="${trimmed%% }" done echo "$trimmed" } EMPTY_SPACE="" while IFS='' read -r SINGLE_LINE || [[ -n "$SINGLE_LINE" ]]; do if [[ "$SINGLE_LINE" == *"events_"* ]]; then if [[ "$SINGLE_LINE" != *"intraday_"* ]]; then SINGLE_LINE=${SINGLE_LINE//TABLE/$EMPTY_SPACE} SINGLE_TABLE=$(trim "${SINGLE_LINE}") echo "Starting: Table ${SINGLE_TABLE}" COMMAND="bq extract --destination_format NEWLINE_DELIMITED_JSON --compression GZIP "${DATASET_NAME}.${SINGLE_TABLE}" ${BUCKET_BASE_PATH}${SINGLE_TABLE}_*.json.gzip" $(${COMMAND}) echo "Finished: Table ${SINGLE_TABLE}" fi fi done < "${TABLE_LIST_FILE_NAME}" rm ${TABLE_LIST_FILE_NAME}