Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

chore: log provider stack traces on text file busy#15249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
stirby merged 1 commit intorelease/2.15fromspike-215-textfile
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 44 additions & 1 deletionprovisioner/terraform/executor.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -200,6 +200,15 @@ func versionFromBinaryPath(ctx context.Context, binaryPath string) (*version.Ver
return version.NewVersion(vj.Version)
}

type textFileBusyError struct {
exitErr *exec.ExitError
stderr string
}

func (e *textFileBusyError) Error() string {
return "text file busy: " + e.exitErr.String()
}

func (e *executor) init(ctx, killCtx context.Context, logr logSink) error {
ctx, span := e.server.startTrace(ctx, tracing.FuncName())
defer span.End()
Expand All@@ -216,13 +225,24 @@ func (e *executor) init(ctx, killCtx context.Context, logr logSink) error {
<-doneErr
}()

// As a special case, we want to look for the error "text file busy" in the stderr output of
// the init command, so we also take a copy of the stderr into an in memory buffer.
errBuf := newBufferedWriteCloser(errWriter)

args := []string{
"init",
"-no-color",
"-input=false",
}

return e.execWriteOutput(ctx, killCtx, args, e.basicEnv(), outWriter, errWriter)
err := e.execWriteOutput(ctx, killCtx, args, e.basicEnv(), outWriter, errBuf)
var exitErr *exec.ExitError
if xerrors.As(err, &exitErr) {
if bytes.Contains(errBuf.b.Bytes(), []byte("text file busy")) {
return &textFileBusyError{exitErr: exitErr, stderr: errBuf.b.String()}
}
}
return err
}

func getPlanFilePath(workdir string) string {
Expand DownExpand Up@@ -707,3 +727,26 @@ func (sw syncWriter) Write(p []byte) (n int, err error) {
defer sw.mut.Unlock()
return sw.w.Write(p)
}

type bufferedWriteCloser struct {
wc io.WriteCloser
b bytes.Buffer
}

func newBufferedWriteCloser(wc io.WriteCloser) *bufferedWriteCloser {
return &bufferedWriteCloser{
wc: wc,
}
}

func (b *bufferedWriteCloser) Write(p []byte) (int, error) {
n, err := b.b.Write(p)
if err != nil {
return n, err
}
return b.wc.Write(p)
}

func (b *bufferedWriteCloser) Close() error {
return b.wc.Close()
}
62 changes: 62 additions & 0 deletionsprovisioner/terraform/provision.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -4,7 +4,11 @@ import (
"context"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"os"
"path/filepath"
"strings"
"time"

Expand DownExpand Up@@ -109,10 +113,32 @@ func (s *server) Plan(
initTimings.ingest(createInitTimingsEvent(timingInitStart))

err = e.init(ctx, killCtx, sess)

if err != nil {
initTimings.ingest(createInitTimingsEvent(timingInitErrored))

s.logger.Debug(ctx, "init failed", slog.Error(err))

// Special handling for "text file busy" c.f. https://github.com/coder/coder/issues/14726
// We believe this might be due to some race condition that prevents the
// terraform-provider-coder process from exiting. When terraform tries to install the
// provider during this init, it copies over the local cache. Normally this isn't an issue,
// but if the terraform-provider-coder process is still running from a previous build, Linux
// returns "text file busy" error when attempting to open the file.
//
// Capturing the stack trace from the process should help us figure out why it has not
// exited. We'll drop these diagnostics in a CRITICAL log so that operators are likely to
// notice, and also because it indicates this provisioner could be permanently broken and
// require a restart.
var errTFB *textFileBusyError
if xerrors.As(err, &errTFB) {
stacktrace := tryGettingCoderProviderStacktrace(sess)
s.logger.Critical(ctx, "init: text file busy",
slog.Error(errTFB),
slog.F("stderr", errTFB.stderr),
slog.F("provider_coder_stacktrace", stacktrace),
)
}
return provisionersdk.PlanErrorf("initialize terraform: %s", err)
}

Expand DownExpand Up@@ -280,3 +306,39 @@ func logTerraformEnvVars(sink logSink) {
}
}
}

// tryGettingCoderProviderStacktrace attempts to dial a special pprof endpoint we added to
// terraform-provider-coder in https://github.com/coder/terraform-provider-coder/pull/295 which
// shipped in v1.0.4. It will return the stacktraces of the provider, which will hopefully allow us
// to figure out why it hasn't exited.
func tryGettingCoderProviderStacktrace(sess *provisionersdk.Session) string {
path := filepath.Clean(filepath.Join(sess.WorkDirectory, "../.coder/pprof"))
sess.Logger.Info(sess.Context(), "attempting to get stack traces", slog.F("path", path))
c := http.Client{
Transport: &http.Transport{
DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) {
d := net.Dialer{}
return d.DialContext(ctx, "unix", path)
},
},
}
req, err := http.NewRequestWithContext(sess.Context(), http.MethodGet,
"http://localhost/debug/pprof/goroutine?debug=2", nil)
if err != nil {
sess.Logger.Error(sess.Context(), "error creating GET request", slog.Error(err))
return ""
}
resp, err := c.Do(req)
if err != nil {
// Only log at Info here, since we only added the pprof endpoint to terraform-provider-coder
// in v1.0.4
sess.Logger.Info(sess.Context(), "could not GET stack traces", slog.Error(err))
return ""
}
defer resp.Body.Close()
stacktraces, err := io.ReadAll(resp.Body)
if err != nil {
sess.Logger.Error(sess.Context(), "could not read stack traces", slog.Error(err))
}
return string(stacktraces)
}
93 changes: 89 additions & 4 deletionsprovisioner/terraform/provision_test.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -9,6 +9,8 @@ import (
"encoding/json"
"errors"
"fmt"
"net"
"net/http"
"os"
"path/filepath"
"runtime"
Expand All@@ -31,14 +33,22 @@ import (
type provisionerServeOptions struct {
binaryPath string
exitTimeout time.Duration
workDir string
logger *slog.Logger
}

func setupProvisioner(t *testing.T, opts *provisionerServeOptions) (context.Context, proto.DRPCProvisionerClient) {
if opts == nil {
opts = &provisionerServeOptions{}
}
cachePath := t.TempDir()
workDir := t.TempDir()
if opts.workDir == "" {
opts.workDir = t.TempDir()
}
if opts.logger == nil {
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
opts.logger = &logger
}
client, server := drpc.MemTransportPipe()
ctx, cancelFunc := context.WithCancel(context.Background())
serverErr := make(chan error, 1)
Expand All@@ -55,8 +65,8 @@ func setupProvisioner(t *testing.T, opts *provisionerServeOptions) (context.Cont
serverErr <- terraform.Serve(ctx, &terraform.ServeOptions{
ServeOptions: &provisionersdk.ServeOptions{
Listener: server,
Logger:slogtest.Make(t, nil).Leveled(slog.LevelDebug),
WorkDirectory: workDir,
Logger:*opts.logger,
WorkDirectory:opts.workDir,
},
BinaryPath: opts.binaryPath,
CachePath: cachePath,
Expand DownExpand Up@@ -236,7 +246,7 @@ func TestProvision_CancelTimeout(t *testing.T) {
dir := t.TempDir()
binPath := filepath.Join(dir, "terraform")

// Example: exec /path/to/terrafork_fake_cancel.sh 1.2.1 apply "$@"
// Example: exec /path/to/terraform_fake_cancel.sh 1.2.1 apply "$@"
content := fmt.Sprintf("#!/bin/sh\nexec %q %s \"$@\"\n", fakeBin, terraform.TerraformVersion.String())
err = os.WriteFile(binPath, []byte(content), 0o755) //#nosec
require.NoError(t, err)
Expand DownExpand Up@@ -282,6 +292,81 @@ func TestProvision_CancelTimeout(t *testing.T) {
}
}

// below we exec fake_text_file_busy.sh, which causes the kernel to execute it, and if more than
// one process tries to do this, it can cause "text file busy" to be returned to us. In this test
// we want to simulate "text file busy" getting logged by terraform, due to an issue with the
// terraform-provider-coder
// nolint: paralleltest
func TestProvision_TextFileBusy(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("This test uses unix sockets and is not supported on Windows")
}

cwd, err := os.Getwd()
require.NoError(t, err)
fakeBin := filepath.Join(cwd, "testdata", "fake_text_file_busy.sh")

dir := t.TempDir()
binPath := filepath.Join(dir, "terraform")

// Example: exec /path/to/terraform_fake_cancel.sh 1.2.1 apply "$@"
content := fmt.Sprintf("#!/bin/sh\nexec %q %s \"$@\"\n", fakeBin, terraform.TerraformVersion.String())
err = os.WriteFile(binPath, []byte(content), 0o755) //#nosec
require.NoError(t, err)

workDir := t.TempDir()

err = os.Mkdir(filepath.Join(workDir, ".coder"), 0o700)
require.NoError(t, err)
l, err := net.Listen("unix", filepath.Join(workDir, ".coder", "pprof"))
require.NoError(t, err)
defer l.Close()
handlerCalled := 0
// nolint: gosec
srv := &http.Server{
Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
assert.Equal(t, "/debug/pprof/goroutine", r.URL.Path)
w.WriteHeader(http.StatusOK)
_, err := w.Write([]byte("thestacks\n"))
assert.NoError(t, err)
handlerCalled++
}),
}
srvErr := make(chan error, 1)
go func() {
srvErr <- srv.Serve(l)
}()

logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
ctx, api := setupProvisioner(t, &provisionerServeOptions{
binaryPath: binPath,
exitTimeout: time.Second,
workDir: workDir,
logger: &logger,
})

sess := configure(ctx, t, api, &proto.Config{
TemplateSourceArchive: makeTar(t, nil),
})

err = sendPlan(sess, proto.WorkspaceTransition_START)
require.NoError(t, err)

found := false
for {
msg, err := sess.Recv()
require.NoError(t, err)

if c := msg.GetPlan(); c != nil {
require.Contains(t, c.Error, "exit status 1")
found = true
break
}
}
require.True(t, found)
require.EqualValues(t, 1, handlerCalled)
}

func TestProvision(t *testing.T) {
t.Parallel()

Expand Down
40 changes: 40 additions & 0 deletionsprovisioner/terraform/testdata/fake_text_file_busy.sh
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
#!/bin/sh

VERSION=$1
shift 1

json_print() {
echo "{\"@level\":\"error\",\"@message\":\"$*\"}"
}

case "$1" in
version)
cat <<-EOF
{
"terraform_version": "${VERSION}",
"platform": "linux_amd64",
"provider_selections": {},
"terraform_outdated": false
}
EOF
exit 0
;;
init)
echo "init"
echo >&2 "Error: Failed to install provider"
echo >&2 " Error while installing coder/coder v1.0.4: open"
echo >&2 " /home/coder/.cache/coder/provisioner-0/tf/registry.terraform.io/coder/coder/1.0.3/linux_amd64/terraform-provider-coder_v1.0.4:"
echo >&2 " text file busy"
exit 1
;;
plan)
echo "plan not supported"
exit 1
;;
apply)
echo "apply not supported"
exit 1
;;
esac

exit 10
Loading

[8]ページ先頭

©2009-2025 Movatter.jp