Thanks @benbjohnson2 for the quick response!
Unfortunately the logs are truncated and can’t go far enough back to see before the disconnection. One thing I did notice is I have a bug in my app that causes it to occasionally attempt to write to the local database on the replica which throws a (sqlite3.OperationalError) attempt to write a readonly database
from the driver. Presumably that doesn’t effect replication though.
I was only able to get the output of /debug/pprof/goroutine
on the replica:
Sydney (replica):
goroutine 34116 [running]:
runtime/pprof.writeGoroutineStacks({0xb66740, 0xc0001840e0})
/usr/local/go/src/runtime/pprof/pprof.go:692 +0x70
runtime/pprof.writeGoroutine({0xb66740?, 0xc0001840e0?}, 0x0?)
/usr/local/go/src/runtime/pprof/pprof.go:681 +0x2b
runtime/pprof.(*Profile).WriteTo(0xa2abc0?, {0xb66740?, 0xc0001840e0?}, 0xc?)
/usr/local/go/src/runtime/pprof/pprof.go:330 +0x14b
net/http/pprof.handler.ServeHTTP({0xc00002e911, 0x9}, {0xb6a690, 0xc0001840e0}, 0xc00009806f?)
/usr/local/go/src/net/http/pprof/pprof.go:253 +0x4a5
net/http/pprof.Index({0xb6a690?, 0xc0001840e0}, 0xc000130600)
/usr/local/go/src/net/http/pprof/pprof.go:371 +0x13e
github.com/superfly/litefs/http.(*Server).serveHTTP(0xc000162e60?, {0xb6a690, 0xc0001840e0}, 0xc000130600)
/src/litefs/http/server.go:128 +0x186
net/http.HandlerFunc.ServeHTTP(0xc0002009b8?, {0xb6a690?, 0xc0001840e0?}, 0xc000200990?)
/usr/local/go/src/net/http/server.go:2109 +0x2f
golang.org/x/net/http2/h2c.h2cHandler.ServeHTTP({{0xb67280?, 0xc0001509a0?}, 0xc00008b980?}, {0xb6a690, 0xc0001840e0}, 0xc000130600)
/go/pkg/mod/golang.org/x/net@v0.0.0-20220909164309-bea034e7d591/http2/h2c/h2c.go:113 +0x49f
net/http.serverHandler.ServeHTTP({0xc000492600?}, {0xb6a690, 0xc0001840e0}, 0xc000130600)
/usr/local/go/src/net/http/server.go:2947 +0x30c
net/http.(*conn).serve(0xc0001628c0, {0xb6ae18, 0xc0001d1290})
/usr/local/go/src/net/http/server.go:1991 +0x607
created by net/http.(*Server).Serve
/usr/local/go/src/net/http/server.go:3102 +0x4db
goroutine 1 [select, 1336 minutes]:
main.main()
/src/litefs/cmd/litefs/main.go:79 +0x557
goroutine 23 [syscall, 1336 minutes]:
os/signal.signal_recv()
/usr/local/go/src/runtime/sigqueue.go:152 +0x2f
os/signal.loop()
/usr/local/go/src/os/signal/signal_unix.go:23 +0x19
created by os/signal.Notify.func1.1
/usr/local/go/src/os/signal/signal.go:151 +0x2a
goroutine 38 [sleep]:
time.Sleep(0x3b9aca00)
/usr/local/go/src/runtime/time.go:195 +0x135
github.com/superfly/litefs.(*Store).monitorLease(0xc000130500, {0xb6ad70, 0xc00008b8c0})
/src/litefs/store.go:462 +0x2c8
github.com/superfly/litefs.(*Store).Open.func1()
/src/litefs/store.go:134 +0x25
golang.org/x/sync/errgroup.(*Group).Go.func1()
/go/pkg/mod/golang.org/x/sync@v0.0.0-20220601150217-0de741cfad7f/errgroup/errgroup.go:75 +0x64
created by golang.org/x/sync/errgroup.(*Group).Go
/go/pkg/mod/golang.org/x/sync@v0.0.0-20220601150217-0de741cfad7f/errgroup/errgroup.go:72 +0xa5
goroutine 39 [select, 1 minutes]:
github.com/superfly/litefs.(*Store).monitorRetention(0xc000130500, {0xb6ad70, 0xc00008b8c0})
/src/litefs/store.go:607 +0xe7
github.com/superfly/litefs.(*Store).Open.func2()
/src/litefs/store.go:138 +0x25
golang.org/x/sync/errgroup.(*Group).Go.func1()
/go/pkg/mod/golang.org/x/sync@v0.0.0-20220601150217-0de741cfad7f/errgroup/errgroup.go:75 +0x64
created by golang.org/x/sync/errgroup.(*Group).Go
/go/pkg/mod/golang.org/x/sync@v0.0.0-20220601150217-0de741cfad7f/errgroup/errgroup.go:72 +0xa5
goroutine 34117 [runnable]:
net/http.(*connReader).startBackgroundRead.func2()
/usr/local/go/src/net/http/server.go:674
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:1594 +0x1
created by net/http.(*connReader).startBackgroundRead
/usr/local/go/src/net/http/server.go:674 +0xca
goroutine 44 [syscall, 21 minutes]:
syscall.Syscall(0x4780ee?, 0xf03940?, 0x40dc07?, 0x9f5880?)
/usr/local/go/src/syscall/syscall_linux.go:68 +0x27
syscall.read(0xf03940?, {0xc000494000?, 0x11?, 0x73586c6f67666e00?})
/usr/local/go/src/syscall/zsyscall_linux_amd64.go:696 +0x45
syscall.Read(...)
/usr/local/go/src/syscall/syscall_unix.go:183
bazil.org/fuse.(*Conn).ReadRequest(0xc0001f2840)
/go/pkg/mod/bazil.org/fuse@v0.0.0-20200524192727-fb710f7dfd05/fuse.go:580 +0xc6
bazil.org/fuse/fs.(*Server).Serve(0xc000185dc0, {0xb65d80?, 0xc0001f25a0})
/go/pkg/mod/bazil.org/fuse@v0.0.0-20200524192727-fb710f7dfd05/fs/serve.go:501 +0x3ef
github.com/superfly/litefs/fuse.(*FileSystem).Mount.func2()
/src/litefs/fuse/file_system.go:93 +0x28
created by github.com/superfly/litefs/fuse.(*FileSystem).Mount
/src/litefs/fuse/file_system.go:92 +0x31c
goroutine 45 [IO wait]:
internal/poll.runtime_pollWait(0x7efbed903038, 0x72)
/usr/local/go/src/runtime/netpoll.go:305 +0x89
internal/poll.(*pollDesc).wait(0xc00016a180?, 0x6?, 0x0)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:84 +0x32
internal/poll.(*pollDesc).waitRead(...)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:89
internal/poll.(*FD).Accept(0xc00016a180)
/usr/local/go/src/internal/poll/fd_unix.go:614 +0x234
net.(*netFD).accept(0xc00016a180)
/usr/local/go/src/net/fd_unix.go:172 +0x35
net.(*TCPListener).accept(0xc000011608)
/usr/local/go/src/net/tcpsock_posix.go:142 +0x28
net.(*TCPListener).Accept(0xc000011608)
/usr/local/go/src/net/tcpsock.go:288 +0x3d
net/http.(*Server).Serve(0xc0000fe1e0, {0xb6a480, 0xc000011608})
/usr/local/go/src/net/http/server.go:3070 +0x385
github.com/superfly/litefs/http.(*Server).Serve.func1()
/src/litefs/http/server.go:74 +0x2d
golang.org/x/sync/errgroup.(*Group).Go.func1()
/go/pkg/mod/golang.org/x/sync@v0.0.0-20220601150217-0de741cfad7f/errgroup/errgroup.go:75 +0x64
created by golang.org/x/sync/errgroup.(*Group).Go
/go/pkg/mod/golang.org/x/sync@v0.0.0-20220601150217-0de741cfad7f/errgroup/errgroup.go:72 +0xa5
goroutine 50 [IO wait]:
internal/poll.runtime_pollWait(0x7efbed902c78, 0x72)
/usr/local/go/src/runtime/netpoll.go:305 +0x89
internal/poll.(*pollDesc).wait(0xc00016b080?, 0xc00024b000?, 0x0)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:84 +0x32
internal/poll.(*pollDesc).waitRead(...)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:89
internal/poll.(*FD).Read(0xc00016b080, {0xc00024b000, 0x1000, 0x1000})
/usr/local/go/src/internal/poll/fd_unix.go:167 +0x25a
net.(*netFD).Read(0xc00016b080, {0xc00024b000?, 0x0?, 0x400?})
/usr/local/go/src/net/fd_posix.go:55 +0x29
net.(*conn).Read(0xc0000123b0, {0xc00024b000?, 0x1000000406730?, 0x4ed000801?})
/usr/local/go/src/net/net.go:183 +0x45
bufio.(*Reader).Read(0xc0001f2ea0, {0xc000185ee0, 0x9, 0x8c6fa5?})
/usr/local/go/src/bufio/bufio.go:237 +0x1bb
io.ReadAtLeast({0xb65920, 0xc0001f2ea0}, {0xc000185ee0, 0x9, 0x9}, 0x9)
/usr/local/go/src/io/io.go:332 +0x9a
io.ReadFull(...)
/usr/local/go/src/io/io.go:351
golang.org/x/net/http2.readFrameHeader({0xc000185ee0?, 0x9?, 0x0?}, {0xb65920?, 0xc0001f2ea0?})
/go/pkg/mod/golang.org/x/net@v0.0.0-20220909164309-bea034e7d591/http2/frame.go:237 +0x6e
golang.org/x/net/http2.(*Framer).ReadFrame(0xc000185ea0)
/go/pkg/mod/golang.org/x/net@v0.0.0-20220909164309-bea034e7d591/http2/frame.go:498 +0x95
golang.org/x/net/http2.(*clientConnReadLoop).run(0xc0001bdf98)
/go/pkg/mod/golang.org/x/net@v0.0.0-20220909164309-bea034e7d591/http2/transport.go:2134 +0x130
golang.org/x/net/http2.(*ClientConn).readLoop(0xc000025080)
/go/pkg/mod/golang.org/x/net@v0.0.0-20220909164309-bea034e7d591/http2/transport.go:2030 +0x6f
created by golang.org/x/net/http2.(*Transport).newClientConn
/go/pkg/mod/golang.org/x/net@v0.0.0-20220909164309-bea034e7d591/http2/transport.go:737 +0xaaa
goroutine 53 [syscall, 1336 minutes]:
syscall.Syscall6(0x55666d4937632b34?, 0x704b45626271542b?, 0x647355635a497271?, 0x64464a7536304d36?, 0x714678544e4a7268?, 0x3166557a46705937?, 0x5a73426752374f47?)
/usr/local/go/src/syscall/syscall_linux.go:90 +0x36
os.(*Process).blockUntilWaitable(0xc000290450)
/usr/local/go/src/os/wait_waitid.go:32 +0x87
os.(*Process).wait(0xc000290450)
/usr/local/go/src/os/exec_unix.go:22 +0x28
os.(*Process).Wait(...)
/usr/local/go/src/os/exec.go:132
os/exec.(*Cmd).Wait(0xc000195b80)
/usr/local/go/src/os/exec/exec.go:599 +0x4b
main.(*Main).execCmd.func1()
/src/litefs/cmd/litefs/main.go:402 +0x25
created by main.(*Main).execCmd
/src/litefs/cmd/litefs/main.go:402 +0x311
goroutine 52 [select, 1336 minutes]:
os/exec.(*Cmd).watchCtx.func1()
/usr/local/go/src/os/exec/exec.go:648 +0x96
created by os/exec.(*Cmd).watchCtx
/usr/local/go/src/os/exec/exec.go:647 +0x8e
goroutine 26564 [IO wait]:
internal/poll.runtime_pollWait(0x7efbed902f48, 0x72)
/usr/local/go/src/runtime/netpoll.go:305 +0x89
internal/poll.(*pollDesc).wait(0xc00016aa80?, 0xc0002ae000?, 0x0)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:84 +0x32
internal/poll.(*pollDesc).waitRead(...)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:89
internal/poll.(*FD).Read(0xc00016aa80, {0xc0002ae000, 0x1300, 0x1300})
/usr/local/go/src/internal/poll/fd_unix.go:167 +0x25a
net.(*netFD).Read(0xc00016aa80, {0xc0002ae000?, 0xc000068560?, 0xc0002ae1b9?})
/usr/local/go/src/net/fd_posix.go:55 +0x29
net.(*conn).Read(0xc0000121b8, {0xc0002ae000?, 0x1299?, 0xc000068560?})
/usr/local/go/src/net/net.go:183 +0x45
crypto/tls.(*atLeastReader).Read(0xc0001fa090, {0xc0002ae000?, 0x0?, 0xc000202860?})
/usr/local/go/src/crypto/tls/conn.go:787 +0x3d
bytes.(*Buffer).ReadFrom(0xc0000a05f8, {0xb65aa0, 0xc0001fa090})
/usr/local/go/src/bytes/buffer.go:202 +0x98
crypto/tls.(*Conn).readFromUntil(0xc0000a0380, {0xb66480?, 0xc0000121b8}, 0x114c?)
/usr/local/go/src/crypto/tls/conn.go:809 +0xe5
crypto/tls.(*Conn).readRecordOrCCS(0xc0000a0380, 0x0)
/usr/local/go/src/crypto/tls/conn.go:616 +0x116
crypto/tls.(*Conn).readRecord(...)
/usr/local/go/src/crypto/tls/conn.go:582
crypto/tls.(*Conn).Read(0xc0000a0380, {0xc0002c8000, 0x1000, 0x6a0b60?})
/usr/local/go/src/crypto/tls/conn.go:1287 +0x16f
bufio.(*Reader).Read(0xc0002c7080, {0xc0001842e0, 0x9, 0x6bda65?})
/usr/local/go/src/bufio/bufio.go:237 +0x1bb
io.ReadAtLeast({0xb65920, 0xc0002c7080}, {0xc0001842e0, 0x9, 0x9}, 0x9)
/usr/local/go/src/io/io.go:332 +0x9a
io.ReadFull(...)
/usr/local/go/src/io/io.go:351
net/http.http2readFrameHeader({0xc0001842e0?, 0x9?, 0xc000492300?}, {0xb65920?, 0xc0002c7080?})
/usr/local/go/src/net/http/h2_bundle.go:1565 +0x6e
net/http.(*http2Framer).ReadFrame(0xc0001842a0)
/usr/local/go/src/net/http/h2_bundle.go:1829 +0x95
net/http.(*http2clientConnReadLoop).run(0xc000202f98)
/usr/local/go/src/net/http/h2_bundle.go:8875 +0x130
net/http.(*http2ClientConn).readLoop(0xc000024900)
/usr/local/go/src/net/http/h2_bundle.go:8771 +0x6f
created by net/http.(*http2Transport).newClientConn
/usr/local/go/src/net/http/h2_bundle.go:7478 +0xaaa
On the primary, the connection is refused, even though the instances seem to be running and passing health checks:
App
Name = cool-stuff-api
Owner = personal
Version = 10
Status = running
Hostname = cool-stuff-api.fly.dev
Platform = nomad
Instances
ID PROCESS VERSION REGION DESIRED STATUS HEALTH CHECKS RESTARTS CREATED
d968ff0c app 10 yyz run running 1 total, 1 passing 0 22h20m ago
7831561d app 10 syd run running 1 total, 1 passing 0 22h20m ago
On the primary instance, the litefs
process is still alive:
# ps aux | grep litefs
root 528 0.0 3.6 724280 8224 ? Sl Nov09 0:14 litefs -- /code/entrypoint.sh uvicorn coolstuff.shell.api:app --port 80 --host 0.0.0.0