Skip to content

Commit

Permalink
splat-1844: add step to report iam used by job
Browse files Browse the repository at this point in the history
  • Loading branch information
mtulio committed Nov 11, 2024
1 parent 3b31b75 commit f2789fa
Show file tree
Hide file tree
Showing 13 changed files with 400 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1858,6 +1858,17 @@ tests:
cluster_profile: aws-perfscale-qe
workflow: rosa-aws-sts-hcp-conformance
timeout: 6h0m0s
- as: e2e-aws-ovn-perms-discovery
cron: '@weekly'
steps:
cluster_profile: aws
env:
AWS_INSTALL_USE_MINIMAL_PERMISSIONS: "yes"
AWS_INSTALL_USE_CUSTOM_IDENTITY: "yes"
SKIP_CREATE_POLICY: "yes"
POLICY_ARN: "arn:aws:iam::aws:policy/AdministratorAccess"
workflow: openshift-e2e-aws
timeout: 6h0m0s
zz_generated_metadata:
branch: master
org: openshift
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105462,6 +105462,83 @@ periodics:
- name: result-aggregator
secret:
secretName: result-aggregator
- agent: kubernetes
cluster: build10
cron: '@weekly'
decorate: true
decoration_config:
skip_cloning: true
timeout: 6h0m0s
extra_refs:
- base_ref: master
org: openshift
repo: release
labels:
ci-operator.openshift.io/cloud: aws
ci-operator.openshift.io/cloud-cluster-profile: aws
ci-operator.openshift.io/variant: nightly-4.18
ci.openshift.io/generator: prowgen
ci.openshift.io/no-builds: "true"
job-release: "4.18"
pj-rehearse.openshift.io/can-be-rehearsed: "true"
name: periodic-ci-openshift-release-master-nightly-4.18-e2e-aws-ovn-perms-discovery
spec:
containers:
- args:
- --gcs-upload-secret=/secrets/gcs/service-account.json
- --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
- --lease-server-credentials-file=/etc/boskos/credentials
- --report-credentials-file=/etc/report/credentials
- --secret-dir=/secrets/ci-pull-credentials
- --target=e2e-aws-ovn-perms-discovery
- --variant=nightly-4.18
command:
- ci-operator
image: ci-operator:latest
imagePullPolicy: Always
name: ""
resources:
requests:
cpu: 10m
volumeMounts:
- mountPath: /etc/boskos
name: boskos
readOnly: true
- mountPath: /secrets/ci-pull-credentials
name: ci-pull-credentials
readOnly: true
- mountPath: /secrets/gcs
name: gcs-credentials
readOnly: true
- mountPath: /secrets/manifest-tool
name: manifest-tool-local-pusher
readOnly: true
- mountPath: /etc/pull-secret
name: pull-secret
readOnly: true
- mountPath: /etc/report
name: result-aggregator
readOnly: true
serviceAccountName: ci-operator
volumes:
- name: boskos
secret:
items:
- key: credentials
path: credentials
secretName: boskos-credentials
- name: ci-pull-credentials
secret:
secretName: ci-pull-credentials
- name: manifest-tool-local-pusher
secret:
secretName: manifest-tool-local-pusher
- name: pull-secret
secret:
secretName: registry-pull-credentials
- name: result-aggregator
secret:
secretName: result-aggregator
- agent: kubernetes
cluster: build10
cron: 0 0 */2 * *
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,26 +54,32 @@ PERMISSIONS_POLICY_FILENAME="aws-permissions-policy-creds.json"
USER_POLICY_FILE="${SHARED_DIR}/${PERMISSIONS_POLICY_FILENAME}"
USER_CREDENTIALS_OUTPUT_FILENAME="aws_minimal_permission"

if [ ! -f ${USER_POLICY_FILE} ]; then
if [[ "${SKIP_CREATE_POLICY-}" == "yes" ]]; then
if [ -z "${POLICY_ARN-}" ]; then
echo "POLICY_ARN must be set when SKIP_CREATE_POLICY is set"
exit 1
fi
elif [ ! -f "${USER_POLICY_FILE}" ]; then
echo "User permission policy file not found. Skipping user creation"
exit 0
else
echo "Policy file:"
jq . "${USER_POLICY_FILE}"

POLICY_NAME="${CLUSTER_NAME}-required-policy"
POLICY_DOC=$(cat "${USER_POLICY_FILE}" | jq -c .)
POLICY_OUTOUT=/tmp/aws_policy_output
echo "Creating policy ${POLICY_NAME}"
aws_create_policy "${REGION}" "${POLICY_NAME}" "${POLICY_DOC}" "${POLICY_OUTOUT}"
fi

echo "Policy file:"
jq . $USER_POLICY_FILE

POLICY_NAME="${CLUSTER_NAME}-required-policy"
POLICY_DOC=$(cat "${USER_POLICY_FILE}" | jq -c .)
POLICY_OUTOUT=/tmp/aws_policy_output

echo "Creating policy ${POLICY_NAME}"
aws_create_policy $REGION "${POLICY_NAME}" "${POLICY_DOC}" "${POLICY_OUTOUT}"

USER_NAME="${CLUSTER_NAME}-minimal-perm"
POLICY_ARN=$(jq -r '.Policy.Arn' ${POLICY_OUTOUT})
USER_OUTOUT=/tmp/aws_user_output
CRED_OUTOUT=/tmp/aws_cred_output

# Registering the creation time to collect audit logs since it
USER_CREATED_TIMESTAMP="$(date -u "+%Y-%m-%dT%H:%M:%S+00:00")"

echo "Creating user ${USER_NAME}"
aws_create_user $REGION "${USER_NAME}" "${POLICY_ARN}" "${USER_OUTOUT}" "${CRED_OUTOUT}"

Expand All @@ -95,3 +101,8 @@ EOF
# for destroy
echo ${POLICY_ARN} >"${SHARED_DIR}/aws_policy_arns"
echo ${USER_NAME} >"${SHARED_DIR}/aws_user_names"

# used by IAM event parser
echo "${USER_CREATED_TIMESTAMP}" > "${SHARED_DIR}"/time_iam_created
echo "${CLUSTER_NAME}" > "${SHARED_DIR}"/CLUSTER_NAME
echo "${LEASED_RESOURCE}" > "${SHARED_DIR}"/LEASED_RESOURCE
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,15 @@ ref:
requests:
cpu: 10m
memory: 100Mi
env:
- name: SKIP_CREATE_POLICY
default: ""
documentation: |-
Set to "yes" to skip the policy creation. It requires the variable POLICY_ARN to be set.
- name: POLICY_ARN
default: ""
documentation: |-
Defines the Policy ARN when the SKIP_CREATE_POLICY is set to "yes", preventing the
step to create an Policy, using a custom provided by the job level.
documentation: |-
Create AWS IAM user with the permissions from aws-permissions-policy-creds.json file .
4 changes: 4 additions & 0 deletions ci-operator/step-registry/gather/cloud-iam-access/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
approvers:
- r4f4
- mtulio
- wking
4 changes: 4 additions & 0 deletions ci-operator/step-registry/gather/cloud-iam-access/aws/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
approvers:
- r4f4
- mtulio
- wking
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
#!/bin/bash

set -o nounset
set -o errexit
set -o pipefail

trap 'CHILDREN=$(jobs -p); if test -n "${CHILDREN}"; then kill ${CHILDREN} && wait; fi' TERM

# attemps to collect events only if job is running with custom IAM User
if [ "${AWS_INSTALL_USE_CUSTOM_IDENTITY-}" != "yes" ];
then
echo "Skipping step as custom IAM identity is not enabled. AWS_INSTALL_USE_CUSTOM_IDENTITY=[${AWS_INSTALL_USE_CUSTOM_IDENTITY-}]"
exit 0
fi

# Check if custom user name has been set, otherwise fail.
if [ ! -f "${SHARED_DIR}/aws_user_names" ];
then
echo "Flag AWS_INSTALL_USE_CUSTOM_IDENTITY is enabled but no custom user name has been found. Check if step aws-provision-iam-user has been succeeded."
exit 1
fi

# Check if the control file with timestamp of user creation has been created, otherwise fail.
if [ ! -f "${SHARED_DIR}/time_iam_created" ];
then
echo "Unable to find timestamp that custom IAM user has been created. Check if step aws-provision-iam-user has been succeeded."
exit 1
fi

if [ -z "${CLUSTER_NAME-}" ];
then
export CLUSTER_NAME=$(<"${SHARED_DIR}/cluster_name")
if [ -z "${CLUSTER_NAME}" ];
then
echo "Unable to locate CLUSTER_NAME"
exit 1
fi
fi

if [ -z "${LEASED_RESOURCE-}" ];
then
export LEASED_RESOURCE=$(<"${SHARED_DIR}"/LEASED_RESOURCE || true)
if [ -z "${LEASED_RESOURCE}" ];
then
echo "Unable to locate LEASED_RESOURCE"
exit 1
fi
fi


function log_msg() {
echo -e "$(date -u --rfc-3339=seconds)> $*"
}

#
# Globals
#
EVENT_WORKDIR=/tmp/iam-events-"${CLUSTER_NAME}"
EVENTS_PATH_RAW=${EVENT_WORKDIR}/objects
EVENTS_PATH_PARSED=${EVENT_WORKDIR}/parsed
CREDS_REQ_PATH=${EVENT_WORKDIR}/credrequests
CREDS_REQ_PATH_RAW=${EVENT_WORKDIR}/credrequests-raw

export AWS_SHARED_CREDENTIALS_FILE="${CLUSTER_PROFILE_DIR}/.awscred"

export GATHER_EVENT_START_TIME="$(<"${SHARED_DIR}"/time_iam_created)"
export GATHER_EVENT_END_TIME="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"

ACCOUNT_ID="$(aws sts get-caller-identity | jq -r .Account)"

#FILE_PREFIX="${ACCOUNT_ID}_CloudTrail_${JOB_REGION}"
OBJECTS_PREFIX="AWSLogs/${ACCOUNT_ID}/CloudTrail/${LEASED_RESOURCE}"
OBJECTS_PREFIX_START="${OBJECTS_PREFIX}/$(date -d "${GATHER_EVENT_START_TIME}" +%Y/%m)"

#
# Init event discovery
#
log_msg "Starting event gathering with timestamps: "
echo "start=[${GATHER_EVENT_START_TIME}] end=[${GATHER_EVENT_END_TIME}]"

mkdir -pv "${EVENTS_PATH_RAW}" || true
GATHER_THRESHOLD=0
RETRY_LIMIT=10
while true; do
GATHER_THRESHOLD=$((GATHER_THRESHOLD+1))
if [ "${GATHER_THRESHOLD}" -ge ${RETRY_LIMIT} ]; then
log_msg "ERROR Timeout waiting for newer events, trying the best effort parsing existing logs"
break
fi
log_msg "Collecting objects with started since ${GATHER_EVENT_START_TIME}"
aws s3api list-objects-v2 \
--bucket "${AWS_TRAIL_BUCKET_NAME}" \
--prefix "${OBJECTS_PREFIX_START}" \
--query 'Contents[?LastModified > `'"${GATHER_EVENT_START_TIME}"'`]' \
> "${EVENTS_PATH_RAW}"-metadata

found=$(jq -r '.|length' "${EVENTS_PATH_RAW}"-metadata)
if [[ ${found} -eq 0 ]]; then
log_msg "Found 0 event, waiting 30s for next iteration [${GATHER_THRESHOLD}/${RETRY_LIMIT}]";
sleep 30;
continue
fi
log_msg "Found [$(jq -r '.|length' "${EVENTS_PATH_RAW}"-metadata)] archive files with events"
for obKey in $(jq -r .[].Key "${EVENTS_PATH_RAW}"-metadata); do
objName=$(basename "${obKey}")
# syncronize only if event archive wasn't downloaded yet
if [[ ! -f ${EVENTS_PATH_RAW}/${objName} ]]; then
echo "Downloading archive ${objName//${ACCOUNT_ID}/XXXXXXXXXXXX}"
aws s3 cp s3://"${AWS_TRAIL_BUCKET_NAME}"/"${obKey}" "${EVENTS_PATH_RAW}"/ >/dev/null
fi
done

log_msg "Checking timestamp of the first record"
LOG_INITIAL_EVENT=$(zcat "${EVENTS_PATH_RAW}"/* | jq -r '.Records[].eventTime'| sort -n | head -n1 || true)

log_msg "Checking timestamp of the last record"
LOG_LATEST_EVENT=$(zcat "${EVENTS_PATH_RAW}"/* | jq -r '.Records[].eventTime'| sort -n | tail -n1 || true)

log_msg "Found event timestamps: initial=[${LOG_INITIAL_EVENT}] final=[${LOG_LATEST_EVENT}]"

# Check if the latest event is greater than desired stop time.
if [ "$(date -d "${LOG_LATEST_EVENT}" +%s)" -gt "$(date -d "${GATHER_EVENT_END_TIME}" +%s)" ];
then
log_msg "Found event with timestamp newer than desired: [${LOG_LATEST_EVENT}]"
break
fi

log_msg "Latest[${LOG_LATEST_EVENT}] event is older than the desired[${GATHER_EVENT_END_TIME}]"
log_msg "Pausing 30 seconds before checking latest events. [${GATHER_THRESHOLD}/${RETRY_LIMIT}]"
sleep 30
done

#
# Download cci (cloud credentials insights)
#
# TODO(mtulio): define where to save that cross-component tool to parse IAM events.
# This script must not be savend in component repo as it is intented to be used by
# CI.
CCI=/tmp/cci
log_msg "Downloading cci (cloud credential insights) utility"
curl -s https://raw.githubusercontent.com/mtulio/mtulio.labs/refs/heads/exp-ocp-cred-informer/labs/ocp-identity/cloud-credentials-insights/cci.py > ${CCI}
chmod +x ${CCI}

#
# Extracting events
#
log_msg "\nChecking the size of discovered events / raw data"
du -sh "${EVENTS_PATH_RAW}"

log_msg "Extracting insights from events"

#INSTALLER_USER_NAME="${CLUSTER_NAME}"-installer
#INSTALLER_USER_NAME="${CLUSTER_NAME}"-minimal-perm
#INSTALLER_USER_NAME=origin-ci-robot-provision
INSTALLER_USER_NAME=$(head -n1 "${SHARED_DIR}/aws_user_names")

mkdir -v "${EVENTS_PATH_PARSED}" || true
${CCI} --command extract \
--events-path "${EVENTS_PATH_RAW}" \
--output "${EVENTS_PATH_PARSED}" \
--filters principal-prefix="${CLUSTER_NAME}",principal-name="${INSTALLER_USER_NAME}"

#
# Extract credentials requests
#
log_msg "Attempting to extract credential requests from RELEASE_IMAGE_LATEST=${RELEASE_IMAGE_LATEST:-}"

mkdir -v "${CREDS_REQ_PATH}" || true
mkdir -v "${CREDS_REQ_PATH_RAW}" || true

function extract_credrequests() {
pushd "${CREDS_REQ_PATH_RAW}"
cp "${CLUSTER_PROFILE_DIR}"/pull-secret pull-secret
#cp $PULL_SECRET_FILE pull-secret
oc registry login --to pull-secret
oc adm release extract --registry-config pull-secret \
--credentials-requests --cloud=aws \
--to="${CREDS_REQ_PATH}" \
--from="${RELEASE_IMAGE_LATEST}"
popd
CCI_EXTRA_ARGS+="--credentials-requests-path=${CREDS_REQ_PATH} "
}
extract_credrequests || true

#
# Parse events considering requested permissions by CredentialsRequests manifests
#
log_msg "Creating report based in events and credential requests..."

# TODO: check if we can cover this file more generically.
INSTALLER_REQUEST_FILE=${SHARED_DIR}/4.18-mint-creds.json
if [[ ! -f "${INSTALLER_REQUEST_FILE}" ]];
then
echo "{}" | jq . > "${INSTALLER_REQUEST_FILE}"
fi

${CCI} --command compare \
--events-path "${EVENTS_PATH_PARSED}"/events.json \
--output "${EVENTS_PATH_PARSED}" \
--installer-user-name "${INSTALLER_USER_NAME}" \
--installer-requests-file "${INSTALLER_REQUEST_FILE}" \
--filters cluster-name="${CLUSTER_NAME}" \
${CCI_EXTRA_ARGS-}

log_msg "Copying results to artifacts directory"
cp -v "${EVENTS_PATH_PARSED}"/* "${ARTIFACT_DIR}"/
Loading

0 comments on commit f2789fa

Please sign in to comment.