Сравнение производительности доступа к полям структур в Python, Common Lisp и С++

c++, cоmmon lisp, freepascal, python, сравнение

0

2

Помните, я сказал, что постараюсь не писать до конца сентября? Так вот: у меня не получилось :) Кому не нравится тут тег Яр - заверяю, что он не для рекламы (и так все уже знают), а исключительно для удобства: мне по нему удобно искать, а вам его удобно заигнорить. Отношение к Яру здесь такое: я мучаюсь, как мне реализовать объекты Яра. Склоняюсь к тому, что сделать их с опцией «реализовать как defstruct/clos». Но дальше идут тонкости, в которые сейчас не лезу.

Итак, я нашёл тред пиписькометрии 7 - летней давности: https://www.linux.org.ru/forum/development/4535326, и там сравнивалась скорость каких-то циклов. но это неинтересно. Интересно сравнивать скорость доступа к полям объектов хотя бы. Я попробовал. Вот что в итоге вышло (платформа 64 разрядная).

С++

// cpp-benchmark.cpp
#include "stdio.h"
#include <cstdlib>

class test_class {
public:
	int fld1, fld2, fld3, fld4, fld5;
        test_class *next;
};


int inner(test_class *o,int bound) {
    int res=0;
    for (int i=0;i<bound;i++) {
        res += o->fld1;
        o->fld2 = o->fld1;
        o->fld3 = o->fld2;
        o->fld4 = o->fld3;
        o->fld5 = o->fld4;
        o->fld1 = res - 1;
        o = o->next;
        res = res % 16;

    }    
    return res;
}

int main(int argc, char* argv[])
{
    test_class o1;
    test_class o2;
    o1.fld1=1;
    o2.fld1=1;
    o1.next=&o2;
    o2.next=&o1;
    int n = 100*1000*1000;
    int result=inner(&o1,n);
    printf("%d %d\n",o1.fld5,result); // проверяем корректность и чтобы оптимизатор
    // не выкинул неиспользуемый код
    return 0;
}
// EOF
// запускаем:
g++ -O2 -o cpp-benchmark cpp-benchmark.cpp ; echo disassemble inner | gdb cpp-benchmark ; time ./cpp-benchmark 
// листинг пропускаю.

real	0m0.225s
user	0m0.216s
sys	0m0.004s

Лисп на структурах:

;; struct-benchmark.lisp
(in-package :cl-user)

(list
 '#.(restrict-compiler-policy 'safety)
 '#.(restrict-compiler-policy 'debug))
 
(declaim
 (optimize (speed 3) (safety 0) (debug 0)
           (space 0) (compilation-speed 0)))

(defparameter *times* (* 100 1000 1000))

(defstruct test-struct 
  (next nil :type (or null test-struct))
  (fld1 0 :type fixnum)
  (fld2 0 :type fixnum)
  (fld3 0 :type fixnum)
  (fld4 0 :type fixnum)
  (fld5 0 :type fixnum)
  )

(declaim (inline test-struct-fld1 test-struct-fld2 test-struct-fld3 test-struct-fld4 test-struct-fld5 (setf test-struct-fld1) (setf test-struct-fld2) (setf test-struct-fld3) (setf test-struct-fld4) (setf test-struct-fld5)
                    test-struct-next))

(defun inner (o n)
  (declare (type test-struct o))
  (declare (type fixnum n))
  (let ((res 0))
    (declare (type fixnum res))
    (dotimes (i n)
      (incf res (the fixnum (test-struct-fld1 o)))
      (setf (test-struct-fld2 o) (test-struct-fld1 o)
            (test-struct-fld3 o) (test-struct-fld2 o)
            (test-struct-fld4 o) (test-struct-fld3 o)
            (test-struct-fld5 o) (test-struct-fld4 o)
            (test-struct-fld1 o) (- res 1)
            o (test-struct-next o)
            res (mod res 16)))
    res))

(defun main ()
  (let* ((o1 (make-test-struct :fld1 1))
         (o2 (make-test-struct :fld1 1 :next o1))
         res)
    (setf (test-struct-next o1) o2)
    (setf res (inner o1 *times*))
    (format t "~S~%~S~%" (test-struct-fld5 o1) res)))

(let ((*trace-output* *standard-output*))
  (time (main)))
;;;; EOF
;; запускаем
>(load (compile-file "~/py/struct-benchmark.lisp"))
  0.394 seconds of real time
  0.436000 seconds of total run time (0.436000 user, 0.000000 system)

Лисп, но вместо inline ставим notinline - все аксессоры превращаются в полноценные функции. Получаем

real time 3.879 seconds

Лисп на CLOS:

;; clos-benchmark-with-types.lisp
(in-package :cl-user)

(list
 '#.(restrict-compiler-policy 'safety)
 '#.(restrict-compiler-policy 'debug))
 
(declaim
 (optimize (speed 3) (safety 0) (debug 0)
           (space 0) (compilation-speed 0)))

(defparameter *times* (* 100 1000 1000))

(defclass test-class-3 ()
  ((fld1 :initarg :fld1 :accessor test-class-3-fld1)
   (fld2 :accessor test-class-3-fld2)
   (fld3 :accessor test-class-3-fld3)
   (fld4 :accessor test-class-3-fld4)
   (fld5 :accessor test-class-3-fld5)
   (next :initarg :next :accessor test-class-3-next)))

(defun inner (o n)
  (declare (type fixnum n))
  (declare (type test-class-3 o))
  (let ((res 0))
    (declare (type fixnum res))
    (dotimes (i n)
      (incf res (the fixnum (test-class-3-fld1 o)))
      (setf (test-class-3-fld2 o) (the fixnum (test-class-3-fld1 o))
            (test-class-3-fld3 o) (the fixnum (test-class-3-fld2 o))
            (test-class-3-fld4 o) (the fixnum (test-class-3-fld3 o))
            (test-class-3-fld5 o) (the fixnum (test-class-3-fld4 o))
            (test-class-3-fld1 o) (the fixnum (- res 1))
            o (test-class-3-next o)
            res (mod res 16)))
    (print res)
    res))

(defun main()
  (let* (
         (o1 (make-instance 'test-class-3 :fld1 1))
         (o2 (make-instance 'test-class-3 :fld1 1 :next o1))
         res)
    (setf (test-class-3-next o1) o2)
    (setf res (inner o1 *times*))
    (format t "~S~%~S~%" (test-class-3-fld5 o1) res)))

(let ((*trace-output* *standard-output*))
  (time (main)))
#|
6.115секунд
|#

python:

# ~/py/oop-benchmark.py
import time

__times__ = 100*1000*1000

class TestClass3(object):
  
    def __init__(self):
        self.fld1 = 1
        self.fld2 = 0
        self.fld3 = 0
        self.fld4 = 0
        self.fld5 = 0
        self.next = self

def inner(o,count):
    res = 0
    for i in xrange(count):
        res += o.fld1
        o.fld2 = o.fld1
        o.fld3 = o.fld2
        o.fld4 = o.fld3
        o.fld5 = o.fld4
        o.fld1 = res - 1
        o = o.next;
        res = res % 16
    return res

def my_main():
    o1 = TestClass3()
    o2 = TestClass3()
    o1.next = o2
    o2.next = o1
    res = inner(o1,__times__)
    print '%s' % o1.fld5
    print '%s' % res

my_main()

# запуск:
#time python oop-benchmark.py 
#266666656
#3
#real	0m51.031s
#user	0m50.696s
#sys	0m0.052s

FreePascal

{oop-benchmark.fpc}
{$mode ObjFPC}
const n=100*1000*1000;
type
  PTest = ^TTest;
  TTest = object
  public
    fld1, fld2, fld3, fld4, fld5: Integer;
    next: PTest;
  end;

function inner(o: PTest; bound: Integer): Integer;
var i: Integer;
begin
  Result:=0;
  for i:=0 to bound-1 do with o^ do begin
    Result:=Result+fld1;
    fld2:=fld1;
    fld3:=fld2;
    fld4:=fld3;
    fld5:=fld4;
    fld1:=Result-1;
    o:=next;
    Result:=Result mod 16;
  end;
end;

var
  o1, o2: TTest;
  b: Integer;
begin
  o1.fld1:=1; o1.next:=@o2;
  o2.fld1:=1; o2.next:=@o1;
  b:=inner(@o1,n);
  WriteLn(o1.fld5,' ',b);
end.
{

fpc -O3 oop-benchmark.fpc
Free Pascal Compiler version 2.6.4+dfsg-4 [2014/10/14] for x86_64
Copyright (c) 1993-2014 by Florian Klaempfl and others
Target OS: Linux for x86-64
Compiling oop-benchmark.fpc
Linking oop-benchmark
/usr/bin/ld.bfd: warning: link.res contains output sections; did you forget -T?
36 lines compiled, 0.1 sec 
1 warning(s) issued
den73@deb8:~/py$ time ./oop-benchmark
266666656 3

real	0m1.810s
user	0m1.776s
sys	0m0.008s
}

cython, pypy

##########
# test.pyx file

import cython

cdef class TestClass:

    cdef public unsigned int fld1, fld2, fld3, fld4, fld5
    cdef public object next
    
    def __cinit__(self):
        self.fld1 = 1
        self.fld2 = 0
        self.fld3 = 0
        self.fld4 = 0
        self.fld5 = 0
        self.next = self

@cython.overflowcheck(False)
@cython.cdivision(True)
cdef inner(TestClass o, count):
    cdef unsigned int res = 0, i
    for i in range(count):
        res += o.fld1
        o.fld2 = o.fld1
        o.fld3 = o.fld2
        o.fld4 = o.fld3
        o.fld5 = o.fld4
        o.fld1 = res - 1
        o = o.next;
        res = res % 16
    return res

def main():
    cdef TestClass o1, o2
    o1 = TestClass()
    o2 = TestClass()
    o1.next = o2
    o2.next = o1
    res = inner(o1, 100_000_000)
    print('%s' % o1.fld5)
    print('%s' % res)

##########
# setup.py file

from distutils.core import setup
from distutils.extension import Extension
from Cython.Build import cythonize

# to compile a module:
# python setup.py build_ext --inplace

extensions = [
    Extension('test', ['test.pyx'],
              extra_compile_args=['-O3'])
]

setup(name = 'access attrs benchmark',
      ext_modules = cythonize(extensions, annotate=True),
)

#########
# main.py file

from test import *
main()

# Запуск с помощью pip 
# 1. Создать ТРИ файла, исходник которых дан выше
# 2. В этой директории:
python setup.py build_ext --inplace
time python main.py

# Результаты:
real	0m0.306s
user	0m0.304s
sys	0m0.004s

$ time pypy oop-benchmark.py 
266666656
3

real	0m0,761s
user	0m0,736s
sys	0m0,020s

EMACS Lisp - несколько быстрее Питона.

Ссылка

← 1 2 3 4 5 6 →

Ответ на: комментарий от Virtuos86 09.10.17 11:21:23 MSK

Ну для меня jit компилятор в 86 строк стал откровением. В своё время пытался разобраться в luajit и бросил. Хотя конечно c4 - игрушечный язык наподобие small c/pawn. Из типов только int, char и указатели. При добавлении других типов, массивов и динамической типизации всё сильно усложняется (или замедляется).

anonymous
(09.10.17 11:29:11 MSK)

Ответ на: комментарий от anonymous 09.10.17 11:29:11 MSK

Я бы даже посмотрел, но от Си совсем далёк и не тянет.

Virtuos86 ★★★★★
(09.10.17 11:34:27 MSK)

Похожие темы