|
| 1 | +#!/bin/sh |
| 2 | +# Copyright 2023 Criticality Score Authors |
| 3 | +# |
| 4 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +# you may not use this file except in compliance with the License. |
| 6 | +# You may obtain a copy of the License at |
| 7 | +# |
| 8 | +# https://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +# |
| 10 | +# Unless required by applicable law or agreed to in writing, software |
| 11 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +# See the License for the specific language governing permissions and |
| 14 | +# limitations under the License. |
| 15 | + |
| 16 | +# Test usage (from the base dir of the repo): |
| 17 | +# docker build . -f containers/init_collect_signals/Dockerfile -t criticality_score_init_collection |
| 18 | +# docker run |
| 19 | +# -v /tmp:/output \ |
| 20 | +# -v $HOME/.config/gcloud:/root/.config/gcloud \ |
| 21 | +# -v $HOME/path/to/config.yaml:/etc/config.yaml \ |
| 22 | +# -ti criticality_score_init_collection \ |
| 23 | +# /bin/init.sh /etc/config.yaml |
| 24 | + |
| 25 | +CONFIG_FILE="$1" |
| 26 | + |
| 27 | +# Read the appropriate settings from the YAML config file. |
| 28 | +BUCKET_URL=`yq -r '."additional-params"."input-bucket".url' "$CONFIG_FILE"` |
| 29 | +BUCKET_PREFIX_FILE=`yq -r '."additional-params"."input-bucket"."prefix-file"' "$CONFIG_FILE"` |
| 30 | +OUTPUT_FILE=`yq -r '."additional-params".criticality."local-url-data-file"' "$CONFIG_FILE"` |
| 31 | +echo "bucket url = $BUCKET_URL" |
| 32 | +echo "bucket prefix file = $BUCKET_PREFIX_FILE" |
| 33 | +echo "url data file = $OUTPUT_FILE" |
| 34 | + |
| 35 | +LATEST_PREFIX=`gsutil cat "$BUCKET_URL"/"$BUCKET_PREFIX_FILE"` |
| 36 | +echo "latest prefix = $LATEST_PREFIX" |
| 37 | + |
| 38 | +# Deinfe some temporary files based on OUTPUT_FILE so they're on the same volume. |
| 39 | +TMP_OUTPUT_FILE_1="$OUTPUT_FILE-tmp-1" |
| 40 | +TMP_OUTPUT_FILE_2="$OUTPUT_FILE-tmp-2" |
| 41 | + |
| 42 | +# Iterate through all the files to merge all together. |
| 43 | +touch "$TMP_OUTPUT_FILE_1" |
| 44 | +for file in `gsutil ls "$BUCKET_URL"/"$LATEST_PREFIX"`; do |
| 45 | + echo "reading $file" |
| 46 | + # Read the file, remove the header and turn it into a plain list of repos. |
| 47 | + gsutil cat "$file" | tail -n +2 | cut -d',' -f1 >> "$TMP_OUTPUT_FILE_1" |
| 48 | +done |
| 49 | + |
| 50 | +# Ensure the file contains only one entry per repo, and shuffle it. |
| 51 | +sort "$TMP_OUTPUT_FILE_1" | uniq | shuf > "$TMP_OUTPUT_FILE_2" |
| 52 | +rm "$TMP_OUTPUT_FILE_1" |
| 53 | + |
| 54 | +# Move the final tmp file to the output file to ensure the change is atomic. |
| 55 | +mv "$TMP_OUTPUT_FILE_2" "$OUTPUT_FILE" |
| 56 | + |
| 57 | +echo "wrote $OUTPUT_FILE" |
0 commit comments