@@ -348,3 +348,140 @@ def rsqrt(ctx, seq, functional=False, size=None, niter=None):
348
348
def sqrt (ctx , seq , functional = False , size = None ):
349
349
"""Square root of an FP expansion."""
350
350
return multiply (ctx , seq , rsqrt (ctx , seq , functional = functional , size = size ), functional = functional , size = size )
351
+
352
+
353
+ def power (ctx , seq , n , functional = False , size = None ):
354
+ """
355
+ n-th power of an FP expansion.
356
+ """
357
+ if n == 0 :
358
+ return [ctx .constant (1 , seq [0 ])]
359
+ if n == 1 :
360
+ assert size is None or len (seq ) == size # not impl
361
+ return seq
362
+ if n == 2 :
363
+ return square (ctx , seq , functional = functional , size = size )
364
+ assert isinstance (n , int ) and n > 0 , n # not impl
365
+ r = power (ctx , seq , n // 2 , functional = functional , size = size )
366
+ sq = square (ctx , r , functional = functional , size = size )
367
+ if n % 2 == 0 :
368
+ return sq
369
+ return multiply (ctx , sq , seq , functional = functional , size = size )
370
+
371
+
372
+ def hypergeometric (ctx , a , b , seq , niter , functional = False , size = None ):
373
+ """
374
+ Generalized hypergeometic series on an FP expansion:
375
+
376
+ sum(prod((a[i])_n, i=0..p) / prod((b[i])_n), i=0..q * z**n / n!, n=0,1,...,niter-1)
377
+
378
+ where
379
+ p = len(a) - 1
380
+ p = len(b) - 1
381
+ (k)_n = 1 if n == 0 else (k)_{n-1} * (k + n - 1)
382
+ n! = 1 if n == 0 else (n-1)! * n
383
+ a and b are lists of integers or Fraction instances.
384
+ """
385
+ import numpy
386
+
387
+ if isinstance (seq [0 ], (numpy .float64 , numpy .float32 , numpy .float16 )):
388
+ return hypergeometric_impl (ctx , type (seq [0 ]), a , b , seq , niter , functional = functional , size = size )
389
+
390
+ largest = fpa .get_largest (ctx , seq [0 ])
391
+ r_fp64 = hypergeometric_impl (ctx , numpy .float64 , a , b , seq , niter , functional = functional , size = size )
392
+ r_fp32 = hypergeometric_impl (ctx , numpy .float32 , a , b , seq , niter , functional = functional , size = size )
393
+ r_fp16 = hypergeometric_impl (ctx , numpy .float16 , a , b , seq , niter , functional = functional , size = size )
394
+
395
+ return ctx .select (largest > 1e308 , r_fp64 , ctx .select (largest > 1e38 , r_fp32 , r_fp16 ))
396
+
397
+
398
+ def hypergeometric_minus_one (ctx , a , b , seq , niter , functional = False , size = None ):
399
+ """
400
+ Generalized hypergeometic series on an FP expansion minus one:
401
+
402
+ sum(prod((a[i])_n, i=0..p) / prod((b[i])_n), i=0..q * z**n / n!, n=1,...,niter-1)
403
+
404
+ where
405
+ p = len(a) - 1
406
+ p = len(b) - 1
407
+ (k)_n = 1 if n == 0 else (k)_{n-1} * (k + n - 1)
408
+ n! = 1 if n == 0 else (n-1)! * n
409
+ a and b are lists of integers or Fraction instances.
410
+ """
411
+ import numpy
412
+
413
+ if isinstance (seq [0 ], (numpy .float64 , numpy .float32 , numpy .float16 )):
414
+ return hypergeometric_minus_one_impl (ctx , type (seq [0 ]), a , b , seq , niter , functional = functional , size = size )
415
+
416
+ largest = fpa .get_largest (ctx , seq [0 ])
417
+ r_fp64 = hypergeometric_minus_one_impl (ctx , numpy .float64 , a , b , seq , niter , functional = functional , size = size )
418
+ r_fp32 = hypergeometric_minus_one_impl (ctx , numpy .float32 , a , b , seq , niter , functional = functional , size = size )
419
+ r_fp16 = hypergeometric_minus_one_impl (ctx , numpy .float16 , a , b , seq , niter , functional = functional , size = size )
420
+
421
+ return ctx .select (largest > 1e308 , r_fp64 , ctx .select (largest > 1e38 , r_fp32 , r_fp16 ))
422
+
423
+
424
+ def hypergeometric_impl (ctx , dtype , a , b , seq , niter , functional = False , size = None ):
425
+ r = hypergeometric_minus_one_impl (ctx , dtype , a , b , seq , niter , functional = functional , size = size )
426
+ return add (ctx , [dtype (1 )], r , functional = functional , size = size )
427
+
428
+
429
+ def hypergeometric_minus_one_impl (ctx , dtype , a , b , seq , niter , functional = False , size = None ):
430
+ import fractions
431
+ import functional_algorithms as fa
432
+
433
+ rcoeffs = []
434
+ for n in range (1 , niter ):
435
+ numer_ = 1
436
+ denom_ = 1
437
+ for a_ in a :
438
+ numer_ *= a_ + n - 1
439
+ for b_ in b :
440
+ denom_ *= b_ + n - 1
441
+ denom_ *= n
442
+
443
+ if not numer_ :
444
+ break
445
+
446
+ rc = fa .utils .fraction2expansion (dtype , fractions .Fraction (numer_ , denom_ ))
447
+ if not rc :
448
+ break
449
+ rcoeffs .append (renormalize (ctx , rc , functional = False ))
450
+
451
+ # hypergeometric series evaluation using Horner' scheme as
452
+ #
453
+ # sum(c[n] * z ** n, n=0..niter-1)
454
+ # = c[0] + c[1] * z + c[2] * z ** 2 + c[3] * z ** 3 + ...
455
+ # = c[0] + (c[1] + (c[2] + (c[3] + ...) * z) * z) * z
456
+ # = fma(fma(fma(fma(..., z, c[3]), z, c[2]), z, c[1]), z, c[0])
457
+ #
458
+ # is inaccurate because c[n] is a rapidly decreasing sequence and
459
+ # the given dtype may not have enough exponent range to represent
460
+ # very small coefficients.
461
+ #
462
+ # In the following, we'll use the property of geometric series that
463
+ # the ratio of neighboring coefficients is a rational number so
464
+ # that we have
465
+ #
466
+ # c[n] * z ** n == (c[n-1] * z ** (n-1)) * z * R(n)
467
+ # c[0] = 1
468
+ #
469
+ # Hence
470
+ #
471
+ # sum(c[n] * z ** n, n=0..niter-1)
472
+ # = c[0] + c[0] * z * R(1) + c[0] * z * R(1) * z * R(2) + ...
473
+ # = c[0] * (1 + z * R(1) * (1 + z * R(2) * (1 + z * R(3) * (1 + ...))))
474
+ # = 1 + z * R(1) * (1 + z * R(2) * (1 + z * R(3) * (1 + ...)))
475
+ #
476
+ # where R(n) is a slowly varying sequence in n. For instance, for
477
+ # float16 hypergeometric([], [], z), R(n) = 1 / n is nonzero for n
478
+ # over 2 ** 24, that is, the maximal value for user-specified
479
+ # niter is practically unlimited.
480
+ def rhorner (rcoeffs , z ):
481
+ r = multiply (ctx , rcoeffs [0 ], z , functional = functional , size = size ) or [dtype (0 )]
482
+ if len (rcoeffs ) > 1 :
483
+ h = add (ctx , [dtype (1 )], rhorner (rcoeffs [1 :], z ), functional = functional , size = size )
484
+ r = multiply (ctx , r , h , functional = functional , size = size )
485
+ return r
486
+
487
+ return rhorner (rcoeffs , seq )
0 commit comments