Hello everyone! I’m currently working on the Challenge #2: Unique ID Generation problem, and I have a question.
I’ve implemented my solution using a single leader node. All other nodes request their unique IDs through this leader. However, when I run the following command:
css
maelstrom test -w unique-ids --bin maelstrom --time-limit 30 \
--rate 1000 --node-count 3 --availability total --nemesis partition
node n1 frequently times out, causing the test to fail. Sometimes it passes, but other times it fails intermittently. If you have any suggestions on what I might be doing wrong or how to improve my approach, I would really appreciate your help.
Below are excerpts of my code and some logs. Thank you in advance!
package main
import (
"context"
"encoding/json"
"sync/atomic"
maelstrom "github.com/jepsen-io/maelstrom/demo/go"
)
var ops atomic.Uint64
// maelstrom1/maelstrom test -w unique-ids --bin maelstrom --time-limit 30 --rate 1000 --node-count 3 --availability total --nemesis partition
func MaelstromGenerate(n *maelstrom.Node) {
n.Handle("generate", func(msg maelstrom.Message) error {
var body map[string]any
if err := json.Unmarshal(msg.Body, &body); err != nil {
return err
}
cur := uint64(0)
for cur == 0 {
cur = getCounter(n)
}
body["type"] = "generate_ok"
body["id"] = cur
return n.Reply(msg, body)
})
n.Handle("get_counter", func(msg maelstrom.Message) error {
body := map[string]any{
"id": getCounter(n),
}
n.Reply(msg, body)
return nil
})
}
func getCounter(n *maelstrom.Node) uint64 {
if n.ID() == "n0" {
return ops.Add(1)
}
resp, err := n.SyncRPC(context.Background(), "n0", map[string]any{"type": "get_counter"})
if err != nil {
return 0
}
var body map[string]any
if err := json.Unmarshal(resp.Body, &body); err != nil {
return 0
}
return uint64(body["id"].(float64))
}
# n1.log
2025/02/14 20:29:01 Received {c1 n1 {"type":"init","node_id":"n1","node_ids":["n0","n1","n2"],"msg_id":1}}
2025/02/14 20:29:01 Node n1 initialized
2025/02/14 20:29:01 Sent {"src":"n1","dest":"c1","body":{"in_reply_to":1,"type":"init_ok"}}
2025/02/14 20:29:01 Received {c6 n1 {"type":"generate","msg_id":1}}
2025/02/14 20:29:01 Sent {"src":"n1","dest":"n0","body":{"msg_id":1,"type":"get_counter"}}
2025/02/14 20:29:06 Received {c6 n1 {"type":"generate","msg_id":2}}
2025/02/14 20:29:06 Sent {"src":"n1","dest":"n0","body":{"msg_id":2,"type":"get_counter"}}
2025/02/14 20:29:11 Received {c6 n1 {"type":"generate","msg_id":3}}
2025/02/14 20:29:11 Sent {"src":"n1","dest":"n0","body":{"msg_id":3,"type":"get_counter"}}
2025/02/14 20:29:16 Received {c6 n1 {"type":"generate","msg_id":4}}
2025/02/14 20:29:16 Sent {"src":"n1","dest":"n0","body":{"msg_id":4,"type":"get_counter"}}
2025/02/14 20:29:21 Received {c6 n1 {"type":"generate","msg_id":5}} // may be timeout here!!
2025/02/14 20:29:21 Sent {"src":"n1","dest":"n0","body":{"msg_id":5,"type":"get_counter"}}
2025/02/14 20:29:21 Received {n0 n1 {"id":16492,"in_reply_to":5}}
2025/02/14 20:29:21 Sent {"src":"n1","dest":"c6","body":{"id":16492,"in_reply_to":5,"msg_id":5,"type":"generate_ok"}}
2025/02/14 20:29:21 Received {c6 n1 {"type":"generate","msg_id":6}}
2025/02/14 20:29:21 Sent {"src":"n1","dest":"n0","body":{"msg_id":6,"type":"get_counter"}}
2025/02/14 20:29:21 Received {n0 n1 {"id":16496,"in_reply_to":6}}
2025/02/14 20:29:21 Sent {"src":"n1","dest":"c6","body":{"id":16496,"in_reply_to":6,"msg_id":6,"type":"generate_ok"}}
2025/02/14 20:29:21 Received {c6 n1 {"type":"generate","msg_id":7}}
2025/02/14 20:29:21 Sent {"src":"n1","dest":"n0","body":{"msg_id":7,"type":"get_counter"}}
2025/02/14 20:29:21 Received {n0 n1 {"id":16500,"in_reply_to":7}}
2025/02/14 20:29:21 Sent {"src":"n1","dest":"c6","body":{"id":16500,"in_reply_to":7,"msg_id":7,"type":"generate_ok"}}
2025/02/14 20:29:21 Received {c6 n1 {"type":"generate","msg_id":8}}
2025/02/14 20:29:21 Sent {"src":"n1","dest":"n0","body":{"msg_id":8,"type":"get_counter"}}
2025/02/14 20:29:21 Received {n0 n1 {"id":16504,"in_reply_to":8}}
...
#cmd log
jepsen test runner - jepsen.core {:perf {:latency-graph {:valid? true},
:rate-graph {:valid? true},
:valid? true},
:timeline {:valid? true},
:exceptions {:valid? true},
:stats {:valid? true,
:count 24784,
:ok-count 24780,
:fail-count 0,
:info-count 4,
:by-f {:generate {:valid? true,
:count 24784,
:ok-count 24780,
:fail-count 0,
:info-count 4}}},
:availability {:valid? false, :ok-fraction 0.9998386},
:net {:all {:send-count 73884,
:recv-count 73880,
:msg-count 73884,
:msgs-per-op 2.9811168},
:clients {:send-count 49570,
:recv-count 49570,
:msg-count 49570},
:servers {:send-count 24314,
:recv-count 24310,
:msg-count 24314,
:msgs-per-op 0.9810361},
:valid? true},
:workload {:valid? true,
:attempted-count 24784,
:acknowledged-count 24780,
:duplicated-count 0,
:duplicated {},
:range [1 24780]},
:valid? false}