История изменений
Исправление olegd, (текущая версия) :
попробуй numpy
numpy.loadtxt падает, если в строках получается разное число столбцов.
Попробовал для однократного чтения:
#!/usr/bin/python3
from sys import argv
import time
import numpy as np
for fil in argv[1:]:
t0 = time.perf_counter()
ar = open( fil, 'rt' ).read()
lines = ar.splitlines()
t1 = time.perf_counter()
print( 'ar.splitlines():', t1-t0 )
t0 = time.perf_counter()
ar = open( fil, 'rt' ).read()
lines = ar.splitlines()
for line in range(len(lines)):
lines[line] = lines[line].split()
#print( lines )
t1 = time.perf_counter()
print( 'replace:', t1-t0 )
t0 = time.perf_counter()
ar = open( fil, 'rt' ).read()
lines = ar.splitlines()
lines2 = []
for line in range(len(lines)):
lines2.append(lines[line].split())
#print( lines2 )
t1 = time.perf_counter()
print( 'append:', t1-t0 )
t0 = time.perf_counter()
ar = open( fil, 'rt' ).read()
lines = ar.splitlines()
lines = [ l.split() for l in lines ]
#print( lines )
t1 = time.perf_counter()
print( '[]:', t1-t0 )
t0 = time.perf_counter()
with open(fil) as f:
lines = [*map(lambda x: x.split(), f), ]
#print(lines)
t1 = time.perf_counter()
print( 'lambda:', t1-t0 )
t0 = time.perf_counter()
lines = np.loadtxt( fil, dtype=str, comments=None, delimiter=None, converters=None,
skiprows=0, usecols=None, unpack=False, ndmin=2, encoding='bytes' )
t1 = time.perf_counter()
#print( lines )
print( 'numpy:', t1-t0 )
На файл, с которым другие работали 0,21-0,23 секунд, Numpy.loadtxt затратила 2,5 с.
Исправление olegd, :
попробуй numpy
numpy.loadtxt падает, если в строках получается разное число столбцов.
Попробовал для однократного чтения:
#!/usr/bin/python3
from sys import argv
import time
import numpy as np
for fil in argv[1:]:
t0 = time.perf_counter()
ar = open( fil, 'rt' ).read()
lines = ar.splitlines()
t1 = time.perf_counter()
print( 'ar.splitlines():', t1-t0 )
t0 = time.perf_counter()
ar = open( fil, 'rt' ).read()
lines = ar.splitlines()
for line in range(len(lines)):
lines[line] = lines[line].split()
#print( lines )
t1 = time.perf_counter()
print( 'replace:', t1-t0 )
t0 = time.perf_counter()
ar = open( fil, 'rt' ).read()
lines = ar.splitlines()
lines2 = []
for line in range(len(lines)):
lines2.append(lines[line].split())
#print( lines2 )
t1 = time.perf_counter()
print( 'append:', t1-t0 )
t0 = time.perf_counter()
ar = open( fil, 'rt' ).read()
lines = ar.splitlines()
lines = [ l.split() for l in lines ]
#print( lines )
t1 = time.perf_counter()
print( '[]:', t1-t0 )
t0 = time.perf_counter()
with open(fil) as f:
lines = [*map(lambda x: x.split(), f), ]
#print(lines)
t1 = time.perf_counter()
print( 'lambda:', t1-t0 )
t0 = time.perf_counter()
lines = np.loadtxt( fil, dtype=str, comments=None, delimiter=None, converters=None,
skiprows=0, usecols=None, unpack=False, ndmin=2, encoding='bytes' )
t1 = time.perf_counter()
#print( lines )
print( 'numpy:', t1-t0 )
На файл, с которым другие работали 0,21-0,22 секунд, Numpy.loadtxt затратила 2,5 с.
Исходная версия olegd, :
попробуй numpy
numpy.loadtxt падает, если в строках получается разное число столбцов.
Попробовал для однократного чтения:
#!/usr/bin/python3
from sys import argv
import time
import numpy as np
for fil in argv[1:]:
t0 = time.perf_counter()
ar = open( fil, 'rt' ).read()
lines = ar.splitlines()
t1 = time.perf_counter()
print( 'ar.splitlines():', t1-t0 )
t0 = time.perf_counter()
ar = open( fil, 'rt' ).read()
lines = ar.splitlines()
for line in range(len(lines)):
lines[line] = lines[line].split()
#print( lines )
t1 = time.perf_counter()
print( 'replace:', t1-t0 )
t0 = time.perf_counter()
ar = open( fil, 'rt' ).read()
lines = ar.splitlines()
lines2 = []
for line in range(len(lines)):
lines2.append(lines[line].split())
#print( lines2 )
t1 = time.perf_counter()
print( 'replace:', t1-t0 )
t0 = time.perf_counter()
ar = open( fil, 'rt' ).read()
lines = ar.splitlines()
lines = [ l.split() for l in lines ]
#print( lines )
t1 = time.perf_counter()
print( '[]:', t1-t0 )
t0 = time.perf_counter()
with open(fil) as f:
lines = [*map(lambda x: x.split(), f), ]
#print(lines)
t1 = time.perf_counter()
print( 'lambda:', t1-t0 )
t0 = time.perf_counter()
lines = np.loadtxt( fil, dtype=str, comments=None, delimiter=None, converters=None,
skiprows=0, usecols=None, unpack=False, ndmin=2, encoding='bytes' )
t1 = time.perf_counter()
#print( lines )
print( 'numpy:', t1-t0 )
На файл, с которым другие работали 0,21-0,22 секунд, Numpy.loadtxt затратила 2,5 с.