-
Notifications
You must be signed in to change notification settings - Fork 77
137 lines (124 loc) · 4.05 KB
/
dvc-studio.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
name: DVC Studio Experiment
on:
push:
tags-ignore:
- '**'
workflow_dispatch:
inputs:
exp-run-args:
description: 'Args to be passed to dvc exp run call'
required: false
type: string
default: ''
parent-sha:
description: 'SHA of the commit to start the experiment from'
required: false
type: string
default: ''
cloud:
description: 'Cloud compute provider to host the runner'
required: false
default: 'aws'
type: choice
options:
- aws
- azure
- gcp
type:
description: 'https://registry.terraform.io/providers/iterative/iterative/latest/docs/resources/task#machine-type'
required: false
default: 'g5.2xlarge'
region:
description: 'https://registry.terraform.io/providers/iterative/iterative/latest/docs/resources/task#cloud-region'
required: false
default: 'us-east'
spot:
description: 'Request a spot instance'
required: false
default: false
type: boolean
storage:
description: 'Disk size in GB'
required: false
default: 40
type: number
timeout:
description: 'Timeout in seconds'
required: false
default: 3600
type: number
permissions:
contents: write
id-token: write
pull-requests: write
jobs:
deploy-runner:
if: ${{ (github.actor == 'iterative-studio[bot]') || (github.event_name == 'workflow_dispatch') }}
environment: cloud
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
ref: ${{ inputs.parent-sha || '' }}
- uses: iterative/setup-cml@v2
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: us-east-2
role-to-assume: ${{ vars.AWS_SANDBOX_ROLE }}
role-duration-seconds: 43200
- name: Create Runner
env:
REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
run: |
cml runner launch --single \
--labels=cml \
--cloud=${{ inputs.cloud || 'aws' }} \
--cloud-region=${{ inputs.region || 'us-east' }} \
--cloud-hdd-size=${{ inputs.storage || '40' }} \
--cloud-type=${{ inputs.type || 'g5.2xlarge' }} \
--idle-timeout=${{ inputs.timeout || '3600' }} \
${{ (inputs.spot == 'true' && '--cloud-spot') || '' }}
runner-job:
needs: deploy-runner
runs-on: [ self-hosted, cml ]
environment: cloud
container:
image: iterativeai/cml:latest-gpu
options: --gpus all --ipc host
steps:
- uses: actions/checkout@v3
with:
ref: ${{ inputs.parent-sha || '' }}
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: us-east-2
role-to-assume: ${{ vars.AWS_SANDBOX_ROLE }}
role-duration-seconds: 43200
- run: pip install -r requirements.txt
- name: Train
env:
REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
DVC_STUDIO_TOKEN: ${{ secrets.DVC_STUDIO_TOKEN }}
DVCLIVE_LOGLEVEL: DEBUG
run: |
cml ci --fetch-depth 0
dvc exp run --pull --allow-missing ${{ github.event.inputs.exp-run-args }}
dvc remote add --local push_remote s3://dvc-public/remote/get-started-pools
- name: Workflow Dispatch Sharing
if: github.event_name == 'workflow_dispatch'
env:
DVC_STUDIO_TOKEN: ${{ secrets.DVC_STUDIO_TOKEN }}
run: |
dvc exp push origin -r push_remote
- name: Commit-based Sharing
if: github.actor == 'iterative-studio[bot]'
env:
REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
run: |
dvc push -r push_remote
cml pr --squash --skip-ci .
echo "## Metrics" > report.md
dvc metrics diff main --md >> report.md
echo "## Params" >> report.md
dvc params diff main --md >> report.md
cml comment create --pr report.md