-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
232 lines (197 loc) · 6.61 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
// The test-splitter tool fetches and runs test plans generated by Buildkite
// Test Splitting.
package main
import (
"context"
"errors"
"flag"
"fmt"
"os"
"os/exec"
"os/signal"
"strconv"
"strings"
"time"
"github.com/buildkite/test-splitter/internal/api"
"github.com/buildkite/test-splitter/internal/config"
"github.com/buildkite/test-splitter/internal/plan"
"github.com/buildkite/test-splitter/internal/runner"
)
var Version = ""
func main() {
// get config
cfg, err := config.New()
if err != nil {
logErrorAndExit(16, "Invalid configuration: %v", err)
}
// TODO: detect test runner and use appropriate runner
testRunner := runner.NewRspec(cfg.TestCommand)
versionFlag := flag.Bool("version", false, "print version information")
// Gathering files
filesFlag := flag.String("files", "", "string of file names for splitting")
flag.Parse()
var files []string
if *versionFlag {
fmt.Println(Version)
os.Exit(0)
}
if *filesFlag != "" {
files = strings.Split(*filesFlag, ",")
} else {
fs, err := testRunner.GetFiles()
if err != nil {
logErrorAndExit(16, "Couldn't get files: %v", err)
}
files = fs
}
// get plan
ctx := context.Background()
// We expect the whole test plan fetching process takes no more than 60 seconds.
// Configure the timeout as 70s to give it a bit more buffer.
fetchCtx, cancel := context.WithTimeout(ctx, 70*time.Second)
defer cancel()
testPlan, err := fetchOrCreateTestPlan(fetchCtx, cfg, files)
if err != nil {
logErrorAndExit(16, "Couldn't create test plan: %v", err)
}
// get plan for this node
thisNodeTask := testPlan.Tasks[strconv.Itoa(cfg.NodeIndex)]
// execute tests
runnableTests := []string{}
for _, testCase := range thisNodeTask.Tests {
runnableTests = append(runnableTests, testCase.Path)
}
cmd, err := testRunner.Command(runnableTests)
if err != nil {
logErrorAndExit(16, "Couldn't process test command: %q, %v", testRunner.TestCommand, err)
}
if err := cmd.Start(); err != nil {
logErrorAndExit(16, "Couldn't start tests: %v", err)
}
// Create a channel that will be closed when the command finishes.
finishCh := make(chan struct{})
// Start a goroutine to that waits for a signal or the command to finish.
go func() {
// Create another channel to receive the signals.
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh)
// Wait for a signal to be received or the command to finish.
// Because a message can come through both channels asynchronously,
// we use for loop to listen to both channels and select the one that has a message.
// Without for loop, only one case would be selected and the other would be ignored.
// If the signal is received first, the finishCh will never get processed and the goroutine will run forever.
for {
select {
case sig := <-sigCh:
// When a signal is received, forward it to the command.
cmd.Process.Signal(sig)
case <-finishCh:
// When the the command finishes, we stop listening for signals and return.
signal.Stop(sigCh)
return
}
}
}()
if err := cmd.Wait(); err != nil {
if exitError := new(exec.ExitError); errors.As(err, &exitError) {
exitCode := exitError.ExitCode()
if cfg.MaxRetries == 0 {
// If retry is disabled, we exit immediately with the same exit code from the test runner
logErrorAndExit(exitCode, "Rspec exited with error %v", err)
} else {
retryExitCode := retryFailedTests(testRunner, cfg.MaxRetries)
if retryExitCode == 0 {
os.Exit(0)
} else {
logErrorAndExit(retryExitCode, "Rspec exited with error %v after retry failing tests", err)
}
}
}
logErrorAndExit(16, "Couldn't run tests: %v", err)
}
// Close the channel that will stop the goroutine.
close(finishCh)
}
func retryFailedTests(testRunner runner.Rspec, maxRetries int) int {
// Retry failed tests
retries := 0
for retries < maxRetries {
retries++
fmt.Printf("Attempt %d of %d to retry failing tests\n", retries, maxRetries)
cmd, err := testRunner.RetryCommand()
if err != nil {
logErrorAndExit(16, "Couldn't process retry command: %v", err)
}
if err := cmd.Start(); err != nil {
logErrorAndExit(16, "Couldn't start tests: %v", err)
}
err = cmd.Wait()
if err != nil {
if exitError := new(exec.ExitError); errors.As(err, &exitError) {
exitCode := exitError.ExitCode()
if retries >= maxRetries {
// If the command exits with an error and we've reached the maximum number of retries, we exit.
return exitCode
}
}
} else {
// If the failing tests pass after retry (test command exits without error), we exit with code 0.
return 0
}
}
return 1
}
// logErrorAndExit logs an error message and exits with the given exit code.
func logErrorAndExit(exitCode int, format string, v ...any) {
fmt.Printf(format+"\n", v...)
os.Exit(exitCode)
}
// fetchOrCreateTestPlan fetches a test plan from the server, or creates a
// fallback plan if the server is unavailable or returns an error plan.
func fetchOrCreateTestPlan(ctx context.Context, cfg config.Config, files []string) (plan.TestPlan, error) {
apiClient := api.NewClient(api.ClientConfig{
ServerBaseUrl: cfg.ServerBaseUrl,
AccessToken: cfg.AccessToken,
OrganizationSlug: cfg.OrganizationSlug,
})
// Fetch the plan from the server's cache.
cachedPlan, err := apiClient.FetchTestPlan(cfg.SuiteSlug, cfg.Identifier)
if err != nil {
return plan.TestPlan{}, err
}
if cachedPlan != nil {
return *cachedPlan, nil
}
// If the cache is empty, create a new plan.
testCases := []plan.TestCase{}
for _, file := range files {
testCases = append(testCases, plan.TestCase{
Path: file,
})
}
testPlan, err := apiClient.CreateTestPlan(ctx, cfg.SuiteSlug, api.TestPlanParams{
Mode: cfg.Mode,
Identifier: cfg.Identifier,
Parallelism: cfg.Parallelism,
Tests: api.TestPlanParamsTest{
Files: testCases,
},
})
if err != nil {
// Didn't exceed context deadline? Must have been some kind of error that
// means we should return error to main function and abort.
if !errors.Is(err, context.DeadlineExceeded) {
return plan.TestPlan{}, err
}
// Create the fallback plan
fmt.Println("Could not fetch plan from server, using fallback mode. Your build may take longer than usual.")
testPlan = plan.CreateFallbackPlan(testCases, cfg.Parallelism)
}
// The server can return an "error" plan indicated by an empty task list (i.e. `{"tasks": {}}`).
// In this case, we should create a fallback plan.
if len(testPlan.Tasks) == 0 {
fmt.Println("Test splitter server returned an error, using fallback mode. Your build may take longer than usual.")
testPlan = plan.CreateFallbackPlan(testCases, cfg.Parallelism)
}
return testPlan, nil
}