用了“尾递归”,不一定有尾递归优化。优化要开启-O2选项才行!而且有其他约束条件哦。
c代码:
#include
#include
int geta(int n1, int n2, int n)
{
if(n<=1)
return n2;
else
{
// printf("&n1: %p\n&n2: %p\n&n:%p\n", &n1, &n2, &n);
return geta(n2,n1+n2,n-1);
}
}
#define N 30
#define ASD(x) #x
int main()
{
printf("The " ASD(N) " %d\n", geta(1,1,N));
return 0;
}
geta函数尾递归优化前:
(gdb) disas geta
Dump of assembler code for function geta:
0x080483e4 <+0>: push ebp
0x080483e5 <+1>: mov ebp,esp
0x080483e7 <+3>: sub esp,0x18
0x080483ea <+6>: cmp DWORD PTR [ebp+0x10],0x1
0x080483ee <+10>: jg 0x80483f5
0x080483f0 <+12>: mov eax,DWORD PTR [ebp+0xc]
0x080483f3 <+15>: jmp 0x8048416
0x080483f5 <+17>: mov eax,DWORD PTR [ebp+0x10]
0x080483f8 <+20>: lea edx,[eax-0x1]
0x080483fb <+23>: mov eax,DWORD PTR [ebp+0xc]
0x080483fe <+26>: mov ecx,DWORD PTR [ebp+0x8]
0x08048401 <+29>: add eax,ecx
0x08048403 <+31>: mov DWORD PTR [esp+0x8],edx
0x08048407 <+35>: mov DWORD PTR [esp+0x4],eax
0x0804840b <+39>: mov eax,DWORD PTR [ebp+0xc]
0x0804840e <+42>: mov DWORD PTR [esp],eax
0x08048411 <+45>: call 0x80483e4 // !!函数递归调用!!
0x08048416 <+50>: leave
0x08048417 <+51>: ret
End of assembler dump.
优化后:
(gdb) disas geta
Dump of assembler code for function geta:
0x08048450 <+0>: push ebx
0x08048451 <+1>: mov edx,DWORD PTR [esp+0x10]
0x08048455 <+5>: mov ecx,DWORD PTR [esp+0xc]
0x08048459 <+9>: mov ebx,DWORD PTR [esp+0x8]
0x0804845d <+13>: cmp edx,0x1
0x08048460 <+16>: mov eax,ecx
0x08048462 <+18>: jg 0x804846c
0x08048464 <+20>: jmp 0x8048477
0x08048466 <+22>: xchg ax,ax
0x08048468 <+24>: mov ebx,ecx
0x0804846a <+26>: mov ecx,eax
0x0804846c <+28>: sub edx,0x1
0x0804846f <+31>: cmp edx,0x1
0x08048472 <+34>: lea eax,[ebx+ecx*1]
0x08048475 <+37>: jne 0x8048468
0x08048477 <+39>: pop ebx
0x08048478 <+40>: ret
End of assembler dump.
优化后没有函数调用,栈帧不生长,相当于变递归为迭代了。
回到上一篇博客的例子,用尾递归计算Fibonacci,尾递归未优化,因此,尾递归虽然优化了时间性能(从指数级变为线性),但是空间复杂度依然是线性,比不上迭代版本的O(1)。这里加以优化,再对比。先再贴一遍代码:
PS:顺便提一下其他优化。
1. 不使用栈帧。call一个函数的时候,压入参数,push eip,然后就开始sub esp了,没有保存ebp和ebp切换的工作。
-fomit-frame-pointer
Dump of assembler code for function fib_rw(int, int, int):
0x080486e4 <+0>: sub esp,0x1c <----没有针对ebp的操作
0x080486e7 <+3>: cmp DWORD PTR [esp+0x28],0x1
0x080486ec <+8>: jg 0x80486f4
0x080486ee <+10>: mov eax,DWORD PTR [esp+0x24]
0x080486f2 <+14>: jmp 0x8048719
0x080486f4 <+16>: mov eax,DWORD PTR [esp+0x28]
0x080486f8 <+20>: lea edx,[eax-0x1]
0x080486fb <+23>: mov eax,DWORD PTR [esp+0x24]
0x080486ff <+27>: mov ecx,DWORD PTR [esp+0x20]
0x08048703 <+31>: add eax,ecx
0x08048705 <+33>: mov DWORD PTR [esp+0x8],edx
0x08048709 <+37>: mov DWORD PTR [esp+0x4],eax
0x0804870d <+41>: mov eax,DWORD PTR [esp+0x24]
0x08048711 <+45>: mov DWORD PTR [esp],eax
0x08048714 <+48>: call 0x80486e4
0x08048719 <+53>: add esp,0x1c
0x0804871c <+56>: ret
End of assembler dump.
2.循环展开。对编译时能确定执行几次的循环,进行展开。
-funroll-loops
3.尾递归单独的优化选项据说是
-foptimize-sibling-calls.
,但是我单独试了下好像没用。
测试:
#include
#include
#include
using namespace std;
//尾递归
int fib_rw(int a, int b, int n)
{
if(n<=1)return b;
// cout<<"a: "<<&a <<" b: "<<&b<<" n: "<<&n<=2)
N=atoi(argv[1]);
if(argc>=3)
TIMES=atoi(argv[2]);
struct timeval begin,end;
int re;
////////////////////////
gettimeofday(&begin,0); //尾递归
{
int i=TIMES;
while(--i)
{
re=fib_rw(1,1,N);
}
}
gettimeofday(&end,0);
if(begin.tv_usec>end.tv_usec)
{
end.tv_sec--;
end.tv_usec+=1000000;
}
cout<<"尾递归 "<end.tv_usec)
{
end.tv_sec--;
end.tv_usec+=1000000;
}
cout<<"迭代 "<
编译
chen@chen-book1:~$ g++ a.cpp -o a -O2
运行
chen@chen-book1:~$ ./a 5 1000
尾递归 8 time: 0 s 14 us
迭代 8 time: 0 s 12 us
chen@chen-book1:~$ ./a 10 1000
尾递归 89 time: 0 s 31 us
迭代 89 time: 0 s 24 us
chen@chen-book1:~$ ./a 15 1000
尾递归 987 time: 0 s 39 us
迭代 987 time: 0 s 37 us
chen@chen-book1:~$ ./a 20 1000
尾递归 10946 time: 0 s 51 us
迭代 10946 time: 0 s 49 us
chen@chen-book1:~$ ./a 25 1000
尾递归 121393 time: 0 s 61 us
迭代 121393 time: 0 s 62 us
chen@chen-book1:~$ ./a 30 1000
尾递归 1346269 time: 0 s 74 us
迭代 1346269 time: 0 s 74 us
chen@chen-book1:~$ ./a 35 1000
尾递归 14930352 time: 0 s 83 us
迭代 14930352 time: 0 s 87 us
chen@chen-book1:~$ ./a 40 1000
尾递归 165580141 time: 0 s 104 us
迭代 165580141 time: 0 s 100 us
chen@chen-book1:~$ ./a 45 1000
尾递归 1836311903 time: 0 s 106 us
迭代 1836311903 time: 0 s 112 us
蓝的是迭代版本(真直!)红的是尾递归,两者差不多吧~