Skip to content

Commit 4704527

Browse files
committed
ecmult_impl: eliminate scratch memory used when generating context
1 parent 7f7a2ed commit 4704527

File tree

4 files changed

+99
-43
lines changed

4 files changed

+99
-43
lines changed

src/ecmult_impl.h

+99-16
Original file line numberDiff line numberDiff line change
@@ -137,24 +137,107 @@ static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *p
137137
secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr);
138138
}
139139

140-
static void secp256k1_ecmult_odd_multiples_table_storage_var(int n, secp256k1_ge_storage *pre, const secp256k1_gej *a, const secp256k1_callback *cb) {
141-
secp256k1_gej *prej = (secp256k1_gej*)checked_malloc(cb, sizeof(secp256k1_gej) * n);
142-
secp256k1_ge *prea = (secp256k1_ge*)checked_malloc(cb, sizeof(secp256k1_ge) * n);
143-
secp256k1_fe *zr = (secp256k1_fe*)checked_malloc(cb, sizeof(secp256k1_fe) * n);
140+
static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp256k1_ge_storage *pre, const secp256k1_gej *a) {
141+
secp256k1_gej d;
142+
secp256k1_ge a_ge, d_ge, p_ge;
143+
secp256k1_ge last_ge;
144+
secp256k1_gej pj;
145+
secp256k1_fe zi;
146+
secp256k1_fe zr;
147+
secp256k1_fe dx_over_dz_squared;
144148
int i;
145149

146-
/* Compute the odd multiples in Jacobian form. */
147-
secp256k1_ecmult_odd_multiples_table(n, prej, zr, a);
148-
/* Convert them in batch to affine coordinates. */
149-
secp256k1_ge_set_table_gej_var(prea, prej, zr, n);
150-
/* Convert them to compact storage form. */
151-
for (i = 0; i < n; i++) {
152-
secp256k1_ge_to_storage(&pre[i], &prea[i]);
150+
VERIFY_CHECK(!a->infinity);
151+
152+
secp256k1_gej_double_var(&d, a, NULL);
153+
154+
/* First, we perform all the additions in an isomorphic curve obtained by multiplying
155+
* all `z` coordinates by 1/`d.z`. In these coordinates `d` is affine so we can use
156+
* `secp256k1_gej_add_ge_var` to perform the additions. For each addition, we store
157+
* the resulting y-coordinate and the z-ratio, since we only have enough memory to
158+
* store two field elements. These are sufficient to efficiently undo the isomorphism
159+
* and recompute all the `x`s.
160+
*/
161+
d_ge.x = d.x;
162+
d_ge.y = d.y;
163+
d_ge.infinity = 0;
164+
165+
secp256k1_ge_set_gej_zinv(&a_ge, a, &d.z);
166+
pj.x = a_ge.x;
167+
pj.y = a_ge.y;
168+
pj.z = a->z;
169+
pj.infinity = 0;
170+
171+
zr = d.z;
172+
secp256k1_fe_normalize_var(&zr);
173+
secp256k1_fe_to_storage(&pre[0].x, &zr);
174+
secp256k1_fe_normalize_var(&pj.y);
175+
secp256k1_fe_to_storage(&pre[0].y, &pj.y);
176+
177+
for (i = 1; i < n; i++) {
178+
secp256k1_gej_add_ge_var(&pj, &pj, &d_ge, &zr);
179+
secp256k1_fe_normalize_var(&zr);
180+
secp256k1_fe_to_storage(&pre[i].x, &zr);
181+
secp256k1_fe_normalize_var(&pj.y);
182+
secp256k1_fe_to_storage(&pre[i].y, &pj.y);
153183
}
154184

155-
free(prea);
156-
free(prej);
157-
free(zr);
185+
/* Map `pj` back to our curve by multiplying its z-coordinate by `d.z`. */
186+
secp256k1_fe_mul(&pj.z, &pj.z, &d.z);
187+
/* Directly set `pre[n - 1]` to `pj`, saving the inverted z-coordinate so
188+
* that we can combine it with the saved z-ratios to compute the other zs
189+
* without any more inversions. */
190+
secp256k1_fe_inv_var(&zi, &pj.z);
191+
secp256k1_ge_set_gej_zinv(&p_ge, &pj, &zi);
192+
secp256k1_ge_from_storage(&last_ge, &pre[n - 1]);
193+
secp256k1_ge_to_storage(&pre[n - 1], &p_ge);
194+
195+
/* Compute the actual x-coordinate of D, which will be needed below. */
196+
secp256k1_fe_inv_var(&d.z, &d.z);
197+
secp256k1_fe_sqr(&dx_over_dz_squared, &d.z);
198+
secp256k1_fe_mul(&dx_over_dz_squared, &dx_over_dz_squared, &d.x);
199+
200+
i = n - 1;
201+
while (i > 0) {
202+
secp256k1_fe zi2, zi3;
203+
i--;
204+
/* For the remaining points, we extract the z-ratio from the stored
205+
* x-coordinate, compute its z^-1 from that, and compute the full
206+
* point from that. The z-ratio for the next iteration is stored in
207+
* the x-coordinate at the end of the loop. */
208+
secp256k1_fe_mul(&zi, &zi, &last_ge.x);
209+
secp256k1_fe_sqr(&zi2, &zi);
210+
secp256k1_fe_mul(&zi3, &zi2, &zi);
211+
/* To compute the actual x-coordinate, we use the stored z ratio and
212+
* y-coordinate, which we obtained from `secp256k1_gej_add_ge_var`
213+
* in the loop above, as well as the inverse of the square of its
214+
* z-coordinate. We store the latter in the `zi2` variable, which is
215+
* computed iteratively starting from the overall Z inverse then
216+
* multiplying by each z-ratio in turn.
217+
*
218+
* Denoting the z-ratio as `rzr` (though the actual variable binding
219+
* is `last_ge.x`), we observe that it equal to `h` from the inside
220+
* of the above `gej_add_ge_var` call. This satisfies
221+
*
222+
* rzr = d_x * z^2 - x
223+
*
224+
* where `d_x` is the x coordinate of `D` and `(x, z)` are Jacobian
225+
* coordinates of our desired point.
226+
*
227+
* Rearranging and dividing by `z^2` to convert to affine, we get
228+
*
229+
* x = d_x - rzr / z^2
230+
* = d_x - rzr * zi2
231+
*/
232+
secp256k1_fe_mul(&p_ge.x, &last_ge.x, &zi2);
233+
secp256k1_fe_negate(&p_ge.x, &p_ge.x, 1);
234+
secp256k1_fe_add(&p_ge.x, &dx_over_dz_squared);
235+
/* y is stored_y/z^3, as we expect */
236+
secp256k1_ge_from_storage(&last_ge, &pre[i]);
237+
secp256k1_fe_mul(&p_ge.y, &last_ge.y, &zi3);
238+
/* Store */
239+
secp256k1_ge_to_storage(&pre[i], &p_ge);
240+
}
158241
}
159242

160243
/** The following two macro retrieves a particular odd multiple from a table
@@ -202,7 +285,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, const
202285
ctx->pre_g = (secp256k1_ge_storage (*)[])checked_malloc(cb, sizeof((*ctx->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G));
203286

204287
/* precompute the tables with odd multiples */
205-
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj, cb);
288+
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj);
206289

207290
#ifdef USE_ENDOMORPHISM
208291
{
@@ -216,7 +299,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, const
216299
for (i = 0; i < 128; i++) {
217300
secp256k1_gej_double_var(&g_128j, &g_128j, NULL);
218301
}
219-
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j, cb);
302+
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j);
220303
}
221304
#endif
222305
}

src/group.h

-5
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,6 @@ static void secp256k1_ge_set_gej(secp256k1_ge *r, secp256k1_gej *a);
6767
/** Set a batch of group elements equal to the inputs given in jacobian coordinates */
6868
static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len);
6969

70-
/** Set a batch of group elements equal to the inputs given in jacobian
71-
* coordinates (with known z-ratios). zr must contain the known z-ratios such
72-
* that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. */
73-
static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len);
74-
7570
/** Bring a batch inputs given in jacobian coordinates (with known z-ratios) to
7671
* the same global z "denominator". zr must contain the known z-ratios such
7772
* that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. The x and y

src/group_impl.h

-18
Original file line numberDiff line numberDiff line change
@@ -167,24 +167,6 @@ static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a
167167
}
168168
}
169169

170-
static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len) {
171-
size_t i = len - 1;
172-
secp256k1_fe zi;
173-
174-
if (len > 0) {
175-
/* Compute the inverse of the last z coordinate, and use it to compute the last affine output. */
176-
secp256k1_fe_inv(&zi, &a[i].z);
177-
secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi);
178-
179-
/* Work out way backwards, using the z-ratios to scale the x/y values. */
180-
while (i > 0) {
181-
secp256k1_fe_mul(&zi, &zi, &zr[i]);
182-
i--;
183-
secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi);
184-
}
185-
}
186-
}
187-
188170
static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge *r, secp256k1_fe *globalz, const secp256k1_gej *a, const secp256k1_fe *zr) {
189171
size_t i = len - 1;
190172
secp256k1_fe zs;

src/tests.c

-4
Original file line numberDiff line numberDiff line change
@@ -2095,24 +2095,20 @@ void test_ge(void) {
20952095
/* Test batch gej -> ge conversion with and without known z ratios. */
20962096
{
20972097
secp256k1_fe *zr = (secp256k1_fe *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_fe));
2098-
secp256k1_ge *ge_set_table = (secp256k1_ge *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_ge));
20992098
secp256k1_ge *ge_set_all = (secp256k1_ge *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_ge));
21002099
for (i = 0; i < 4 * runs + 1; i++) {
21012100
/* Compute gej[i + 1].z / gez[i].z (with gej[n].z taken to be 1). */
21022101
if (i < 4 * runs) {
21032102
secp256k1_fe_mul(&zr[i + 1], &zinv[i], &gej[i + 1].z);
21042103
}
21052104
}
2106-
secp256k1_ge_set_table_gej_var(ge_set_table, gej, zr, 4 * runs + 1);
21072105
secp256k1_ge_set_all_gej_var(ge_set_all, gej, 4 * runs + 1);
21082106
for (i = 0; i < 4 * runs + 1; i++) {
21092107
secp256k1_fe s;
21102108
random_fe_non_zero(&s);
21112109
secp256k1_gej_rescale(&gej[i], &s);
2112-
ge_equals_gej(&ge_set_table[i], &gej[i]);
21132110
ge_equals_gej(&ge_set_all[i], &gej[i]);
21142111
}
2115-
free(ge_set_table);
21162112
free(ge_set_all);
21172113
free(zr);
21182114
}

0 commit comments

Comments
 (0)