low performance for aes7z
Closed this issue · 2 comments
I have a encrypted 7z file, file size was about 60MB, 100+ files in it, when I extract it, cost over 20 seconds on my computer, I trace the code found key will calculate on every file opened in sevenz.File.Open , its very slow, I try to optimize aes7z.calculateKey for get key cached, (I guess all files in a 7z has same key, but I can't comfirm it), it seems works ok, the aes7z/key.go I optimized is here:
package aes7z
import (
"bytes"
"crypto/sha256"
"encoding/binary"
"sync"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
type keyCacheItem struct {
password string
cycles int
salt []byte
key []byte
}
func (c *keyCacheItem) hittest(password string, cycles int, salt []byte) bool {
return c.password == password && c.cycles == cycles && bytes.Equal(salt, c.salt)
}
var keyCache []*keyCacheItem = []*keyCacheItem{}
var keyCacheLock sync.RWMutex
func findKeyCached(password string, cycles int, salt []byte) []byte {
keyCacheLock.RLock()
defer keyCacheLock.RUnlock()
for _, kci := range keyCache {
if kci.hittest(password, cycles, salt) {
return kci.key
}
}
return nil
}
func recordKeyCached(password string, cycles int, salt []byte, key []byte) {
keyCacheLock.Lock()
defer keyCacheLock.Unlock()
keyCache = append(keyCache, &keyCacheItem{password: password, cycles: cycles, salt: salt, key: key})
}
func calculateKey(password string, cycles int, salt []byte) []byte {
k := findKeyCached(password, cycles, salt)
if len(k) > 0 {
// key found in cache
return k
}
b := bytes.NewBuffer(salt)
// Convert password to UTF-16LE
utf16le := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
t := transform.NewWriter(b, utf16le.NewEncoder())
_, _ = t.Write([]byte(password))
key := make([]byte, sha256.Size)
if cycles == 0x3f {
copy(key, b.Bytes())
} else {
h := sha256.New()
for i := uint64(0); i < 1<<cycles; i++ {
// These will never error
_, _ = h.Write(b.Bytes())
_ = binary.Write(h, binary.LittleEndian, i)
}
copy(key, h.Sum(nil))
}
recordKeyCached(password, cycles, salt, key)
return key
}
my test code is here
package main
import (
"io"
"os"
"time"
sevenz "github.com/bodgit/sevenzip"
)
func open7zArchive(archive, password string) (*sevenz.ReadCloser, error) {
if password != "" {
return sevenz.OpenReaderWithPassword(archive, password)
}
return sevenz.OpenReader(archive)
}
func extract7zItem(file *sevenz.File, target string) error {
if file.FileInfo().IsDir() {
return os.MkdirAll(target, file.Mode())
}
f, err := os.OpenFile(target, os.O_CREATE|os.O_WRONLY, file.Mode())
if err != nil {
return err
}
defer f.Close()
// open on each file cost too many times
fReader, err := file.Open()
if err != nil {
return err
}
defer fReader.Close()
_, err = io.Copy(f, fReader)
return err
}
func extract7zWithCallback(archive string, password string, handler func(*sevenz.File) error) error {
reader, err := open7zArchive(archive, password)
if err != nil {
return err
}
defer reader.Close()
for _, f := range reader.File {
if err := handler(f); err != nil {
return err
}
}
return nil
}
func extract7zArchive(archive, password string, path string) error {
if path[len(path)-1] != '/' && path[len(path)-1] != '\\' {
path += "/"
}
return extract7zWithCallback(archive, password, func(f *sevenz.File) error {
return extract7zItem(f, path+f.Name)
})
}
func main() {
t := time.Now().UnixMilli()
println(extract7zArchive("F:\\log4job\\日常工作\\2024\\03-29\\client_v2403.csp", "test", "F:\\log4job\\日常工作\\2024\\03-29\\client_v2403"))
println(time.Now().UnixMilli()-t, " ms cost")
}
extract my 7z file cost 20 seconds on no cache and 5 seconds on key cached
Normally files are all stored in a single stream, so the key should only need to be calculated once. However if you enable encryption and then don't use compression, it will add the files individually. I managed to reproduce the problem creating an archive with the following:
7z a -mhc=on -mhe=on -ppassword -m0=copy archive.7z ...
You can confirm this with 7z l
:
Path = testdata/aes7z.7z
Type = 7z
Physical Size = 36552
Headers Size = 424
Method = Copy 7zAES
Solid = -
Blocks = 10
The Solid = -
and Blocks = 10
confirms this, versus another archive:
Path = aes7z.7z
Type = 7z
Physical Size = 6167
Headers Size = 311
Method = LZMA2:48k 7zAES
Solid = +
Blocks = 1
Both archives contain 10 files.
I have released v1.5.1 which improves the performance of AES key calculation.
Thanks for your detailed report!