diff --git a/ggml b/ggml
index a0fec8f..60f6f57 160000
--- a/ggml
+++ b/ggml
@@ -1 +1 @@
-Subproject commit a0fec8ffa8b64fe67face8cc7d4af3dac370965d
+Subproject commit 60f6f57aef4abd2769936b0668d7ee563b6bdb22
diff --git a/rwkv_graph.inc b/rwkv_graph.inc
index e213f84..3de3b12 100644
--- a/rwkv_graph.inc
+++ b/rwkv_graph.inc
@@ -575,19 +575,11 @@ static bool rwkv_measure_and_build_serial_context(struct rwkv_model & model, str
 
     RWKV_ENSURE_OR_FALSE(rwkv_build_serial_graph(model, graph));
 
-    struct ggml_allocr * allocator = ggml_allocr_new_measure(tensor_alignment);
-
-    size_t required_context_size = ggml_allocr_alloc_graph(allocator, graph.cgraph.get()) +
+    size_t required_context_size = ggml_total_size_for_tensor_data(graph.ggml_ctx) +
             // With the node limit set 80K, this overhead would be 28 MB.
             + rwkv_ggml_overhead()
-            + tensor_alignment
-            // For some reason, `ggml_allocr_alloc_graph` underestimates required memory amount.
-            // Instead of diving deep into ggml internals to debug this issue, I will just add some padding.
-            // 40 MB seems to be enough for Raven 14B model when GGML_MAX_NODES is set to default value of 4096.
-            // TODO Check for v5
-            + size_t(40) * 1024 * 1024;
-
-    ggml_allocr_free(allocator);
+            + tensor_alignment;
+
     ggml_free(graph.ggml_ctx);
 
     // 2. Create the real ggml context.
@@ -724,19 +716,11 @@ static bool rwkv_measure_and_build_sequential_context(struct rwkv_model & model,
 
     RWKV_ENSURE_OR_FALSE(rwkv_build_sequential_graph(model, graph, sequence_length));
 
-    struct ggml_allocr * allocator = ggml_allocr_new_measure(tensor_alignment);
-
-    size_t required_context_size = ggml_allocr_alloc_graph(allocator, graph.cgraph.get()) +
+    size_t required_context_size = ggml_total_size_for_tensor_data(graph.ggml_ctx) +
             // With the node limit set 80K, this overhead would be 28 MB.
             + rwkv_ggml_overhead()
-            + tensor_alignment
-            // For some reason, `ggml_allocr_alloc_graph` underestimates required memory amount.
-            // Instead of diving deep into ggml internals to debug this issue, I will just add some padding.
-            // 40 MB per token seems to be enough for Raven 14B model. It works for sequence_length at least up to 71.
-            // TODO Check for v5 1.5B, 3B, 7B
-            + sequence_length * 64 * 1024 * 1024;
-
-    ggml_allocr_free(allocator);
+            + tensor_alignment;
+
     ggml_free(graph.ggml_ctx);
 
     // 2. Create the real ggml context.