comba乘法（性能优良）

公子相 · 发表于 2023-11-11 12:18:33

乘法是进行大数运算的基础，一个好的乘法实现，是我们梦寐以求的。
最简单的乘法就是硬乘法了，它也是最基础的乘法，然而硬乘法的时间复杂度是O(n2),当n较大时，这个时间消耗根本不能忍受，于是出现了karatsuba乘法，能将时间复杂度压缩到O(n1.57)。
由于karatsuba乘法需要调用硬乘法，所以就要想一些办法，把硬乘法做好。
通常的方案是：
      1）采用comba乘法。
      2）采用汇编语言。
      3）采用较大的进位制。
我在本站大整数乘法代码的帖子，本质就是满足上述3个条件的一个较好的实现。
现在来看，大整数乘法代码还能有提升的空间，经过实践，把[url=]大整数乘法代码[/url]优化如下：

   硬乘法：       乘数 [3] [2] [1] [0] //[]表示一个limb
            另一个乘数 [3] [2] [1] [0]

                     相乘的结果    [0][3] [0][2] [0][1] [0][0]
                                 [1][3] [1][2] [1][1] [1][0]
                        [2][3] [2][2] [2][1] [2][0]
               [3][3] [3][2] [3][1] [3][0]
-----------------------------------------------------------
                  7       6    5    4       3    2       1

每一次乘完的结果，不立即写在内存上，而是保存在寄存器中累加。
下面就是优化后的性能良好的comba乘法实现。

编译环境：Windows 10 ,visual studio 2022 ,debug+x86模式。切记是x86模式！

   函数原形： void Big_mul( UINT32 *result,const UINT32 * const left,const UINT32 * const right,int LL,int LR)
         其中：*result 乘积的结果。 * left 乘数。*right 另一个乘数。LL, *left的长度，LR， *right的长度。
         进制：采用10000 0000进制。十进制数高位对应数组较大的下标，十进制数的低位对应数组较小的下标。
   特别声明：*result 要用到的存贮空间，是* left 和*right 两个乘数的空间之和的两倍。
                     * left 和*right 两个乘数的空间，要至少保证有3位冗余。即LL+3、LR+3是数组的有效长度，防止访问越界。
                  * left 和*right 两个乘数的长度不能同时超过14700位十进制数。

_declspec(naked)void re_mul_abs(UINT32 *result, UINT32 *left, UINT32 *right, int LL, int LR){#undef LEFT_REG#undef RIGHT_REG#undef RESULT_REG#define LEFT_REG esi#define RIGHT_REG edi#define RESULT_REG ebp __asm { push esi push edi push ebx push ebp mov RIGHT_REG, dword ptr[esp + 0Ch + 16]; right mov ebx, dword ptr[esp + 14h + 16]; LLloop01: mov eax, dword ptr[esp + 14h + 16] sub eax, ebx mov ecx, dword ptr[esp + 10h + 16]; LL//ecx中的值是内循环的次数，每次进入内循环时取新值。 mov LEFT_REG, dword ptr[esp + 08h + 16]; left// LEFT--REGk中的值是内循环的乘数，每次进入内循环时取新值。 mov RESULT_REG, dword ptr[esp + 04h + 16]; result//RESULT_REG，结果，每次进入内循环时移位， lea RESULT_REG, [RESULT_REG + 8 * eax] push ebx loop00 : push ecx mov eax, dword ptr[LEFT_REG] // 第一次乘 mul dword ptr[RIGHT_REG] add dword ptr[RESULT_REG], eax adc dword ptr[RESULT_REG + 4], edx //------------------------------------------------------------------- mov eax, dword ptr[LEFT_REG + 4] // 第二次乘（一次） mul dword ptr[RIGHT_REG] mov ebx, edx mov ecx, eax mov eax, dword ptr[LEFT_REG]/// 第二次乘（二次） mul dword ptr[RIGHT_REG + 4] add ecx, eax adc ebx, edx //------------------------------------------------------------------ mov eax, dword ptr[LEFT_REG]///// 第三次乘( 1次） mul dword ptr[RIGHT_REG + 8] add dword ptr[RESULT_REG + 8], ecx // adc dword ptr[RESULT_REG + 12], ebx // 第二次乘的结果 mov ebx, edx mov ecx, eax mov eax, dword ptr[LEFT_REG + 4]///// 第三次乘（二次） mul dword ptr[RIGHT_REG + 4] add ecx, eax adc ebx, edx mov eax, dword ptr[LEFT_REG + 8]///// 第三次乘（三次） mul dword ptr[RIGHT_REG] add ecx, eax adc ebx, edx //--------------------------------------------------------- mov eax, dword ptr[LEFT_REG]///// 第四次乘( 1次） mul dword ptr[RIGHT_REG + 12] add dword ptr[RESULT_REG + 16], ecx // adc dword ptr[RESULT_REG + 20], ebx // 第三次乘的结果 mov ebx, edx mov ecx, eax mov eax, dword ptr[LEFT_REG + 4]///// 第四次乘（ 2次） mul dword ptr[RIGHT_REG + 8] add ecx, eax adc ebx, edx mov eax, dword ptr[LEFT_REG + 8]///// 第四次乘（ 3次） mul dword ptr[RIGHT_REG + 4] add ecx, eax adc ebx, edx mov eax, dword ptr[LEFT_REG + 12]///// 第四次乘（ 4次） mul dword ptr[RIGHT_REG] add ecx, eax adc ebx, edx //------------------------------------------------------ mov eax, dword ptr[LEFT_REG + 4]///// 第五次乘( 1次） mul dword ptr[RIGHT_REG + 12] add dword ptr[RESULT_REG + 24], ecx // adc dword ptr[RESULT_REG + 28], ebx // 第四次乘的结果 mov ebx, edx mov ecx, eax mov eax, dword ptr[LEFT_REG + 8]///// 第五次乘（ 2次） mul dword ptr[RIGHT_REG + 8] add ecx, eax adc ebx, edx mov eax, dword ptr[LEFT_REG + 12]///// 第五次乘（ 3次） mul dword ptr[RIGHT_REG + 4] add ecx, eax adc ebx, edx //----------------------------------------------------------- mov eax, dword ptr[LEFT_REG + 8]///// 第六次乘( 1次） mul dword ptr[RIGHT_REG + 12] add dword ptr[RESULT_REG + 32], ecx // adc dword ptr[RESULT_REG + 36], ebx // 第五次乘的结果 mov ebx, edx mov ecx, eax mov eax, dword ptr[LEFT_REG + 12]///// 第六次乘（ 2次） mul dword ptr[RIGHT_REG + 8] add ecx, eax adc ebx, edx //----------------------------------------------------------- mov eax, dword ptr[LEFT_REG + 12]///// 第七次乘( 1次） mul dword ptr[RIGHT_REG + 12] add dword ptr[RESULT_REG + 40], ecx // adc dword ptr[RESULT_REG + 44], ebx // 第六次乘的结果 add dword ptr[RESULT_REG + 48], eax adc dword ptr[RESULT_REG + 52], edx //第七次乘的结果 //----------------------------------------------------------- lea LEFT_REG, [LEFT_REG + 16] lea RESULT_REG, [RESULT_REG + 32] pop ecx sub ecx, 4 ja loop00;;///内循环。ja 如果ecx为正数则跳转 lea RIGHT_REG, [RIGHT_REG + 16] pop ebx sub ebx, 4 ja loop01//////外循环，ja 如果ebx为正数则跳转 //====================================================================// xor edi, edi mov ebx, BASE mov ecx, dword ptr[esp + 10h + 16] add ecx, dword ptr[esp + 14h + 16] mov RESULT_REG, dword ptr[esp + 04h + 16]; result loop02 : xor edx, edx xor eax, eax add edi, dword ptr[RESULT_REG] adc eax, dword ptr[RESULT_REG + 4] ///取本位的高32位做除法 div ebx add dword ptr [RESULT_REG+12], eax///商加在下一个数的高32位 mov eax, edi ///取本位数的低32位，与edx中的余数做除法 div ebx mov dword ptr[RESULT_REG], edx ///本位的余数就是结果。 mov edi, eax///本位的商，加在下一位的低32位 lea RESULT_REG, [RESULT_REG + 8] dec ecx jnz loop02 ///--------------------------------------------------------------------------------------------- mov ecx, dword ptr[esp + 10h + 16] add ecx, dword ptr[esp + 14h + 16] mov esi, ecx mov edx, 1 xor ebx, ebx mov RESULT_REG, dword ptr[esp + 04h + 16]; result loop04 : mov eax, dword ptr[RESULT_REG + 8 * edx] mov dword ptr[RESULT_REG + 4 * edx], eax add edx, 1 dec ecx jnz loop04//////// lea RESULT_REG, [RESULT_REG + 4 * edx] loop05: mov dword ptr[RESULT_REG], ebx lea RESULT_REG, [RESULT_REG + 4] dec esi jnz loop05//(/// loop 04\05循环，把结果移位，将多余的数清零 mov dword ptr[RESULT_REG], ebx //-----------------------------------------------------///*/// pop ebp pop ebx pop edi pop esi ret }/////*////}///*////

复制代码

inabilezami · 发表于 2023-11-11 12:19:16

程序第147行。BASE是10^8,进位制的基数。

ozjazit · 发表于 2023-11-11 12:20:15

现在有现成的大整数算法库，比hugecalc还牛逼很多，所以我建议你不要浪费时间搞这个了

Elishslots · 发表于 2023-11-11 12:20:31

你用什么编辑器来写汇编的？

uzmedafacof · 发表于 2023-11-11 12:21:28

估计你也有60了，少搞大整数乘法，浪费时间

inabilezami · 发表于 2023-11-11 12:22:28

泼冷水的坏习性

singqing · 发表于 2023-11-11 12:22:42

		自动登录	找回密码
密码			立即注册