c

#include <cstdio>
int r=100;
int main()
{
    int cnt=0;

    for(int a=-r; a<=r; a++)
        for(int b=-r; b<=r ; b++)
            for(int c=-r; c<=r; c++)
                for(int d=-r; d<=r ; d++)
                    if(a*a+b*b+c*c+d*d-r*r<=0)
                    {
                        //printf("%d %d %d %d %d\n",cnt,a,b,c,d);
                        cnt++;
                    }

    printf("%d",cnt);
}

julia

r,cnt=100,0
for a=-r:r,b=-r:r,c=-r:r,d=-r:r
if a*a+b*b+c*c+d*d-r*r<=0 cnt=cnt+1 end
end
println(cnt)

比较

D:\>g++ p596.cpp -O3

D:\>a\timer a

Timer 3.01  Copyright (c) 2002-2003 Igor Pavlov  2003-07-10
493490641
Kernel Time  =     0.015 = 00:00:00.015 =   0%
User Time    =     1.544 = 00:00:01.544 =  49%
Process Time =     1.560 = 00:00:01.560 =  50%
Global Time  =     3.105 = 00:00:03.105 = 100%

julia> @time include("d:\\p596.jl")
493490641
757.315780 seconds (12.00 G allocations: 101.719 GiB, 3.11% gc time)

不明白嵌套循环为什么分配这么多内存
改用while循环

r,cnt=100,0
a=-r
while a<=r
b=-r
while b<=r
c=-r
while c<=r
d=-r
while d<=r
if a*a+b*b+c*c+d*d-r*r<=0 cnt=cnt+1 end
d=d+1
end
c=c+1
end
b=b+1
end
a=a+1
end

println(cnt)

在r=20时,内存,时间都比for小一些

julia> @time include("d:\\p596a.jl")  #for
789905
  1.263037 seconds (5.57 M allocations: 65.239 MiB, 0.63% gc time)

julia> @time include("d:\\p596b.jl") #while
789905
  1.044400 seconds (2.60 M allocations: 19.921 MiB, 0.37% gc time)

python的表现很正常

r,cnt=100,0
for a in range(-r,r+1):
 for b in range(-r,r+1):
  for c in range(-r,r+1):
   for d in range(-r,r+1):
    if a*a+b*b+c*c+d*d-r*r<=0:
     cnt=cnt+1
print(cnt)

D:\>a\timer \pypy256\pypy p596.py

Timer 3.01  Copyright (c) 2002-2003 Igor Pavlov  2003-07-10
493490641

Kernel Time  =     0.078 = 00:00:00.078 =   0%
User Time    =    10.218 = 00:00:10.218 =  87%
Process Time =    10.296 = 00:00:10.296 =  87%
Global Time  =    11.716 = 00:00:11.716 = 100%

联想到上次把python改写成julia的经过,把上述算法包含在一个函数里,然后调用函数,速度极大地提高,这个问题得解。

function f5()
r,cnt=100,0
e=-r:r
for a in e,b in e,c in e,d in e
if a*a+b*b+c*c+d*d-r*r<=0 cnt=cnt+1 end
end
println(cnt)
end
@time  f5()
-----
493490641
  1.334010 seconds (6.82 k allocations: 205.733 KiB)

0.51文档中提到

Avoid global variables
A global variable might have its value, and therefore its type, change at any point. This makes it difficult for the compiler to optimize code using global variables. Variables should be local, or passed as arguments to functions, whenever possible.
Any code that is performance critical or being benchmarked should be inside a function.

避免全局变量
全局变量的值、类型,都可能变化。这使编译器难优化使用全局变量 的代码。应使用具部变量 , 或者把 变量当 做参数传递给函数。
对性能至关重要的代码,应放入函数中。