首页 > 解决方案 > 如何在 C++ 中获得与 Fortran 相同的实数值精度(Parrel Studio XE 编译器)

问题描述

我有一个几乎很大的Fortran 77代码,我正在尝试用c++编写它。Fortran 代码有太多的数学公式,我必须在 c++ 中获得相同的参数值。

我在 Fortran 中有这样的代码:

  implicit real*8 (a-h,o-z)
  real *8 test
  test=3.14159**2
  print *,test

输出为: 9.86958772810000

在 c++ 代码中(我只使用 pow 作为示例,我在每个数学公式中都有这个问题):

//  1st Try
double test=pow(3.14159,2);
cout <<std::setprecision(std::numeric_limits<double>::digits10 + 1) <<fixed <<test;

输出为:9.86958885192871

我知道我可以通过像这样为种类选择器添加后缀来指定fp 编号的种类(但对于 fortran,我需要在 c++ 0 中获得相同的值:

real test=3.14159_8**2

如本问题中所述,C++ 和 Fortran 中的精度不同

我也在 C++ 中试过这个,输出是:

   //  2nd Try as users suggested in the comments
  float test2 = pow(3.14159, 2);

输出9.8695878982543945

如果我尝试:

   //  3rd Try as users suggested in the comments
  float test2 = pow(3.14159f, 2);

输出将是:9.8695888519287109

这仍然有差异。

** 我需要在 c++ 而不是 Fortran 中获得相同的值** 因为 Fortran 项目在整个项目中都使用此参数,并且我必须获得相同的输出。

那么无论如何我在c ++中获得相同的浮点/双精度吗?

任何帮助将不胜感激。(谢谢大家的帮助)。

正如 Kerndog73 问的那样,我试过了

std::numeric_limits<double>::digits // value is 53
std::numeric_limits<double>::is_iec559 //value is 1

PS:更多细节

这是我原始FORTRAN 代码的一部分,正如您所见,我需要在 c++ 中具有所有 10 个精度才能获得相同的值(此代码在代码末尾的文本文件中绘制一个形状,而我的 c++ 代码不相似到那个形状,因为精度值不一样):

  // in the last loop i have a value like this 9292780397998.33
 // all precision have used 

      dp=p2-p1
      dr=(r2-r1)/(real(gx-1))
      dfi=2*3.14159*zr/(real(gy-1))
      test=3.14159**2
      print *,test
      r11=r1
      print *,'dp , dr , dfi'
      print *,dp,dr,dfi
      do 11 i=1,gx
        r(i)=r11
        st(i)=dr*r(i)*dfi
        r11=r11+dr
         print *, r11,r(i),st(i)
 11   continue

      dh=h02-h01
      do 1 i=1,gx
        do 2 j=1,gy
          h0=h01+dh*(r(i)-r1)/(r2-r1)
          hkk=hk(i,j)
          if (hkk.eq.10) then
            hk(i,j)=hkkk
          end if
          h00=h0+hk(i,j)
          h(i,j)=h00/1000000.
          !print *, i,j, h(i,j)
          !print*, h(i,j)
 2      continue
 1    continue
!
!      write(30,501) '     '
 do 12 i=1,gx
        do 22 j=1,gy
          h3=h(i,j)**3
          h3r(i,j)=h3*r(i)
          h3ur(i,j)=h3/r(i)
          !print *,i,j, h3ur(i,j)
          p0(i,j)=p1+dp*(r(i)-r1)/(r2-r1)
           !print *,i,j, p0(i,j)
 22     continue
 12   continue

      drfi=dr/(dfi*48*zmu)
      dfir=dfi/(dr*48*zmu)
      omr=om*dr/8.
       print *,'drfi,dfir,omr,zmu'
      print *,drfi,dfir,omr,zmu
           !p1 = 10000
      !do 100 k=1,giter
        do 32 i=1,gx
          do 42 j=1,gy
            if (i.eq.1) then

              pp(i,j)=p1**2
              goto 242
            end if
            if (i.eq.gx) then
              pp(i,j)=p2**2
              goto 242
            end if
            if (j.eq.1.) then
                temp1=drfi*(2*h3ur(i,1)+h3ur(i,(gy-1))+h3ur(i,2))
              a=drfi*(2*h3ur(i,1)+h3ur(i,(gy-1))+h3ur(i,2))+
     &          dfir*(2*h3r(i,1)+h3r(i-1,1)+h3r(i+1,1))
     &          -omr*r(i)*(h(i,(gy-1))-h(i,2))/p0(i,1)

              b=drfi*(h3ur(i,1)+h3ur(i,(gy-1)))+
     &          omr*r(i)*(h(i,(gy-1))+h(i,1))/p0(i,(gy-1))

              c=drfi*(h3ur(i,1)+h3ur(i,2))-
     &          omr*r(i)*(h(i,1)+h(i,2))/p0(i,2)

              d=dfir*(h3r(i,1)+h3r(i-1,1))

              e=dfir*(h3r(i,1)+h3r(i+1,1))

              pp(i,j)=(b*p0(i,(gy-1))**2+c*p0(i,2)**2+
     &          d*p0(i-1,1)**2+e*p0(i+1,1)**2)/a

              goto 242
              end if

            if (j.eq.gy) then
              a=drfi*(2*h3ur(i,gy)+h3ur(i,(gy-1))+h3ur(i,2))+
     &          dfir*(2*h3r(i,gy)+h3r(i-1,gy)+h3r(i+1,gy))
     &          -omr*r(i)*(h(i,(gy-1))-h(i,2))/p0(i,gy)

              b=drfi*(h3ur(i,gy)+h3ur(i,(gy-1)))+
     &          omr*r(i)*(h(i,(gy-1))+h(i,gy))/p0(i,(gy-1))

              c=drfi*(h3ur(i,gy)+h3ur(i,2))-
     &          omr*r(i)*(h(i,gy)+h(i,2))/p0(i,2)

              d=dfir*(h3r(i,gy)+h3r(i-1,gy))

              e=dfir*(h3r(i,gy)+h3r(i+1,gy))

              pp(i,j)=(b*p0(i,(gy-1))**2+c*p0(i,2)**2+
     &          d*p0(i-1,gy)**2+e*p0(i+1,gy)**2)/a

              goto 242
            end if

            a=drfi*(2*h3ur(i,j)+h3ur(i,j-1)+h3ur(i,j+1))+
     &        dfir*(2*h3r(i,j)+h3r(i-1,j)+h3r(i+1,j))
     &        -omr*r(i)*(h(i,j-1)-h(i,j+1))/p0(i,j)

            b=drfi*(h3ur(i,j)+h3ur(i,j-1))+
     &        omr*r(i)*(h(i,j-1)+h(i,j))/p0(i,j-1)

            c=drfi*(h3ur(i,j)+h3ur(i,j+1))-
     &        omr*r(i)*(h(i,j)+h(i,j+1))/p0(i,j+1)

            d=dfir*(h3r(i,j)+h3r(i-1,j))

            e=dfir*(h3r(i,j)+h3r(i+1,j))

            pp(i,j)=(b*p0(i,j-1)**2+c*p0(i,j+1)**2+
     &        d*p0(i-1,j)**2+e*p0(i+1,j)**2)/a  

 242        continue

            ppp=pp(i,j)
            print *,ppp
            pneu=sqrt(ppp)
            palt=p0(i,j)
            p0(i,j)=palt+(pneu-palt)/2.
            !print *,p0(i,j)
            wt(i,j)=zmu*om*om*((r(i)+dr)**2+r(i)**2)/(2*h(i,j))
             !print *,r(i)
            p00(i,j)=p0(i,j)/100000.
            !print *, p00(i,j)
 42       continue
 32     continue

标签: c++floating-pointfortran

解决方案


我编写了一个程序以 3 种格式输出所有可能的结果,并在各种可能的时间对每种类型进行强制转换:

#include <cmath>
#include <iomanip>
#include <iostream>
#include <limits>

// use `volatile` extensively to inhibit "float store" optimizations

template<class T>
void pp(volatile T val)
{
    const size_t prec = std::numeric_limits<T>::digits10 + 1;
    std::cout << std::setprecision(prec);
    std::cout << std::left;
    std::cout << std::setfill('0');
    std::cout << std::setw(prec+2) << val;
}

int main()
{
    using L = long double;
    using D = double;
    using F = float;

    volatile L lp = 3.14159l;
    volatile D dp = 3.14159;
    volatile F fp = 3.14159f;

    volatile L lpl = lp;
    volatile D dpl = lp;
    volatile F fpl = lp;
    volatile L lpd = dp;
    volatile D dpd = dp;
    volatile F fpd = dp;
    volatile L lpf = fp;
    volatile D dpf = fp;
    volatile F fpf = fp;

    volatile L lpl2 = powl(lpl, 2);
    volatile D dpl2 = pow(dpl, 2);
    volatile F fpl2 = powf(fpl, 2);
    volatile L lpd2 = powl(lpd, 2);
    volatile D dpd2 = pow(dpd, 2);
    volatile F fpd2 = powf(fpd, 2);
    volatile L lpf2 = powl(lpf, 2);
    volatile D dpf2 = pow(dpf, 2);
    volatile F fpf2 = powf(fpf, 2);

    std::cout << "lpl2:  "; pp((L)lpl2); std::cout << "  "; pp((D)lpl2); std::cout << "  "; pp((F)lpl2); std::cout << '\n';
    std::cout << "dpl2:  "; pp((L)dpl2); std::cout << "  "; pp((D)dpl2); std::cout << "  "; pp((F)dpl2); std::cout << '\n';
    std::cout << "fpl2:  "; pp((L)fpl2); std::cout << "  "; pp((D)fpl2); std::cout << "  "; pp((F)fpl2); std::cout << '\n';
    std::cout << "lpd2:  "; pp((L)lpd2); std::cout << "  "; pp((D)lpd2); std::cout << "  "; pp((F)lpd2); std::cout << '\n';
    std::cout << "dpd2:  "; pp((L)dpd2); std::cout << "  "; pp((D)dpd2); std::cout << "  "; pp((F)dpd2); std::cout << '\n';
    std::cout << "fpd2:  "; pp((L)fpd2); std::cout << "  "; pp((D)fpd2); std::cout << "  "; pp((F)fpd2); std::cout << '\n';
    std::cout << "lpf2:  "; pp((L)lpf2); std::cout << "  "; pp((D)lpf2); std::cout << "  "; pp((F)lpf2); std::cout << '\n';
    std::cout << "dpf2:  "; pp((L)dpf2); std::cout << "  "; pp((D)dpf2); std::cout << "  "; pp((F)dpf2); std::cout << '\n';
    std::cout << "fpf2:  "; pp((L)fpf2); std::cout << "  "; pp((D)fpf2); std::cout << "  "; pp((F)fpf2); std::cout << '\n';

    return 0;
}

在我的 Linux 系统上,这会输出:

       long double           double             float
lpl2:  9.869587728100000000  9.869587728100001  9.869588
dpl2:  9.869587728099999069  9.869587728099999  9.869588
fpl2:  9.869588851928710938  9.869588851928711  9.869589
lpd2:  9.869587728099999262  9.869587728099999  9.869588
dpd2:  9.869587728099999069  9.869587728099999  9.869588
fpd2:  9.869588851928710938  9.869588851928711  9.869589
lpf2:  9.869588472080067731  9.869588472080068  9.869589
dpf2:  9.869588472080067731  9.869588472080068  9.869589
fpf2:  9.869588851928710938  9.869588851928711  9.869589

基于此,您可能显示的数字太少,但英特尔的 80 位格式long double在 Linux(以及我相信大多数 x86 操作系统)上,但在 Windows 上通常不可用。

您也可能使用十进制浮点数。

但是也有可能您的 Fortran 运行时完全被破坏了,许多 float<->string 库可以被慷慨地描述为完全和完全废话。

为了可靠性,使用十六进制浮点 I/O 是一个好习惯。


推荐阅读