|
发表于 2024-3-27 05:43:13
|
显示全部楼层
another code- /*Hi again!Here is a simple recursive parallel gmp-chudnovsky program using Cilk or Cilk++.The Cilk/Cilk++ compiler is based on GNU C, with a few extra keywords to controlparallelism and works with the normal GNU GMP library. Using Cilk/Cilk++ instead of C with Pthreads or OpenMP makes recursive divide-and-conquer codeeasier to implement, debug (see cilkscreen), and maybe faster.More information about Cilk is at http://en.wikipedia.org/wiki/Cilk.Also, this simple version of gmp-chudnovsky has the factorization performance enhancement removed for readability, so running with one core will not be as fast as the Hanhong Xue's original version. Enjoy,David Carver*//* Pi computation using Chudnovsky's algortithm. * Copyright 2002,2005 Hanhong Xue (macroxue at yahoo dot com) * Slightly modified 2005 by Torbjorn Granlund to allow more than 2G digits to be computed. * Modified 2010 by David Carver (dcarver at tacc dot utexas dot edu) to demonstrate a parallel and fully recursive version of the gmp-chudnovsky program, using cilk/cilk++. To compile with GMP: cilk++ -Wall -O2 -o pgmp-chudnovsky pgmp-chudnovsky.cilk -lgmp -lm -lmiser To run: ./pgmp-chudnovsky 1000 1 or add the Cilk++ option "-cilk_set_worker_count=" to specify the number of workers (hardware threads used). ./pgmp-chudnovsky 1000 1 -cilk_set_worker_count=1 To get help run the program with no options: ./pgmp-chudnovsky Syntax: ./pgmp-chudnovsky.cilk.bin <digits> <option> <digits> digits of pi to output <option> 0 - just run (default) 1 - output decimal digits to stdout * Redistribution and use in source and binary forms,with or without * modification,are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES,INCLUDING,BUT NOT LIMITED TO,THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL, * SPECIAL,EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT,STRICT LIABILITY,OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */#include <assert.h>#include <math.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <time.h>#include <sys/time.h>#include <gmp.h>#include <cilk.h>#define A 13591409#define B 545140134#define C 640320#define D 12#define BITS_PER_DIGIT 3.32192809488736234787#define DIGITS_PER_ITER 14.1816474627254776555#define DOUBLE_PREC 53////////////////////////////////////////////////////////////////////////////double wall_clock(){ struct timeval timeval; (void) gettimeofday (&timeval,NULL); return (timeval.tv_sec + (timeval.tv_usec / 1000000.0));}/////////////////////////////////////////////////////////////////////////////* binary splitting */voidbs(unsigned long a,unsigned long b,unsigned gflag,mpz_t pstack1,mpz_t qstack1, mpz_t gstack1){ unsigned long mid; mpz_t pstack2,qstack2,gstack2; if (b-a==1) { /* g(b-1,b) = (6b-5)(2b-1)(6b-1) p(b-1,b) = b^3 * C^3 / 24 q(b-1,b) = (-1)^b*g(b-1,b)*(A+Bb). */ mpz_set_ui(pstack1,b); mpz_mul_ui(pstack1,pstack1,b); mpz_mul_ui(pstack1,pstack1,b); mpz_mul_ui(pstack1,pstack1,(C/24)*(C/24)); mpz_mul_ui(pstack1,pstack1,C*24); mpz_set_ui(gstack1,2*b-1); mpz_mul_ui(gstack1,gstack1,6*b-1); mpz_mul_ui(gstack1,gstack1,6*b-5); mpz_set_ui(qstack1,b); mpz_mul_ui(qstack1,qstack1,B); mpz_add_ui(qstack1,qstack1,A); mpz_mul (qstack1,qstack1,gstack1); if (b%2) mpz_neg(qstack1,qstack1); } else { /* p(a,b) = p(a,m) * p(m,b) g(a,b) = g(a,m) * g(m,b) q(a,b) = q(a,m) * p(m,b) + q(m,b) * g(a,m) */ mid = a+(b-a)*0.5224; /* tuning parameter */ mpz_init(pstack2); mpz_init(qstack2); mpz_init(gstack2); cilk_spawn bs(mid,b,gflag,pstack2,qstack2,gstack2); bs(a,mid,1,pstack1,qstack1,gstack1); cilk_sync; mpz_mul(pstack1,pstack1,pstack2); mpz_mul(qstack1,qstack1,pstack2); mpz_mul(qstack2,qstack2,gstack1); mpz_add(qstack1,qstack1,qstack2); if (gflag) { mpz_mul(gstack1,gstack1,gstack2); } mpz_clear(pstack2); mpz_clear(qstack2); mpz_clear(gstack2); }}intcilk_main(int argc,char *argv[]){ mpf_t pi,qi; mpz_t pstack,qstack,gstack; long int d=100,out=0,terms; unsigned long psize,qsize; char *prog_name; clock_t begin,end; double wbegin,wend; prog_name = argv[0]; if (argc==1) { fprintf(stderr,"\nSyntax: %s <digits> <option>\n",prog_name); fprintf(stderr," <digits> digits of pi to output\n"); fprintf(stderr," <option> 0 - just run (default)\n"); fprintf(stderr," 1 - output decimal digits to stdout\n"); exit(1); } if (argc>1) d = strtoul(argv[1],0,0); if (argc>2) out = atoi(argv[2]); terms = d/DIGITS_PER_ITER; fprintf(stderr,"#terms=%ld workers=%d\n",terms,cilk::current_worker_count()); begin = clock(); wbegin = wall_clock(); mpz_init(pstack); mpz_init(qstack); mpz_init(gstack); /* begin binary splitting process */ if (terms<=0) { mpz_set_ui(pstack,1); mpz_set_ui(qstack,0); mpz_set_ui(gstack,1); } else { bs(0,terms,0,pstack,qstack,gstack); } end = clock(); wend = wall_clock(); fprintf(stderr,"bs cputime = %6.3f wallclock = %6.3f\n", (double)(end-begin)/CLOCKS_PER_SEC,(wend-wbegin)); fflush(stderr); mpz_clear(gstack); /* prepare to convert integers to floats */ mpf_set_default_prec((long int)(d*BITS_PER_DIGIT+16)); /* p*(C/D)*sqrt(C) pi = ----------------- (q+A*p) */ psize = mpz_sizeinbase(pstack,10); qsize = mpz_sizeinbase(qstack,10); mpz_addmul_ui(qstack,pstack,A); mpz_mul_ui(pstack,pstack,C/D); mpf_init(pi); mpf_set_z(pi,pstack); mpz_clear(pstack); mpf_init(qi); mpf_set_z(qi,qstack); mpz_clear(qstack); /* final step */ mpf_div(qi,pi,qi); mpf_sqrt_ui(pi,C); mpf_mul(qi,qi,pi); end = clock(); wend = wall_clock(); /* output Pi and timing statistics */ fprintf(stderr,"total cputime = %6.3f wallclock = %6.3f\n", (double)(end-begin)/CLOCKS_PER_SEC,(wend-wbegin)); fflush(stderr); printf(" P size=%ld digits (%f)\n" " Q size=%ld digits (%f)\n", psize,(double)psize/d,qsize,(double)qsize/d); if (out&1) { printf("pi(0,%ld)=\n",terms); mpf_out_str(stdout,10,d+2,qi); printf("\n"); } /* free float resources */ mpf_clear(pi); mpf_clear(qi); exit (0);}
复制代码 |
|