Browse Source

update deps

pull/279/head
tnextday 10 years ago
parent
commit
7aae3661ee
  1. 5
      Makefile
  2. 5
      weed/Godeps/Godeps.json
  3. 24
      weed/vendor/github.com/klauspost/crc32/.gitignore
  4. 12
      weed/vendor/github.com/klauspost/crc32/.travis.yml
  5. 28
      weed/vendor/github.com/klauspost/crc32/LICENSE
  6. 84
      weed/vendor/github.com/klauspost/crc32/README.md
  7. 186
      weed/vendor/github.com/klauspost/crc32/crc32.go
  8. 62
      weed/vendor/github.com/klauspost/crc32/crc32_amd64.go
  9. 237
      weed/vendor/github.com/klauspost/crc32/crc32_amd64.s
  10. 40
      weed/vendor/github.com/klauspost/crc32/crc32_amd64p32.go
  11. 67
      weed/vendor/github.com/klauspost/crc32/crc32_amd64p32.s
  12. 29
      weed/vendor/github.com/klauspost/crc32/crc32_generic.go

5
Makefile

@ -6,15 +6,12 @@ SOURCE_DIR = ./weed
all: build all: build
.PHONY : clean deps build linux vet
.PHONY : clean godep build linux vet
clean: clean:
go clean -i $(GO_FLAGS) $(SOURCE_DIR) go clean -i $(GO_FLAGS) $(SOURCE_DIR)
rm -f $(BINARY) rm -f $(BINARY)
deps:
go get $(GO_FLAGS) -d $(SOURCE_DIR)
fmt: fmt:
gofmt -w -s $(SOURCE_DIR) gofmt -w -s $(SOURCE_DIR)

5
weed/Godeps/Godeps.json

@ -83,6 +83,11 @@
"ImportPath": "github.com/hailocab/go-hostpool", "ImportPath": "github.com/hailocab/go-hostpool",
"Rev": "0637eae892be221164aff5fcbccc57171aea6406" "Rev": "0637eae892be221164aff5fcbccc57171aea6406"
}, },
{
"ImportPath": "github.com/klauspost/crc32",
"Comment": "v1.0",
"Rev": "19b0b332c9e4516a6370a0456e6182c3b5036720"
},
{ {
"ImportPath": "github.com/pierrec/lz4", "ImportPath": "github.com/pierrec/lz4",
"Rev": "0b67ae4bb1ab03691079e38dddbc3909d68de64f" "Rev": "0b67ae4bb1ab03691079e38dddbc3909d68de64f"

24
weed/vendor/github.com/klauspost/crc32/.gitignore

@ -0,0 +1,24 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof

12
weed/vendor/github.com/klauspost/crc32/.travis.yml

@ -0,0 +1,12 @@
language: go
go:
- 1.3
- 1.4
- 1.5
- 1.6
- tip
script:
- go test -v .
- go test -v -race .

28
weed/vendor/github.com/klauspost/crc32/LICENSE

@ -0,0 +1,28 @@
Copyright (c) 2012 The Go Authors. All rights reserved.
Copyright (c) 2015 Klaus Post
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

84
weed/vendor/github.com/klauspost/crc32/README.md

@ -0,0 +1,84 @@
# crc32
CRC32 hash with x64 optimizations
This package is a drop-in replacement for the standard library `hash/crc32` package, that features SSE 4.2 optimizations on x64 platforms, for a 10x speedup.
[![Build Status](https://travis-ci.org/klauspost/crc32.svg?branch=master)](https://travis-ci.org/klauspost/crc32)
# usage
Install using `go get github.com/klauspost/crc32`. This library is based on Go 1.5 code and requires Go 1.3 or newer.
Replace `import "hash/crc32"` with `import "github.com/klauspost/crc32"` and you are good to go.
# changes
* Dec 4, 2015: Uses the "slice-by-8" trick more extensively, which gives a 1.5 to 2.5x speedup if assembler is unavailable.
# performance
For IEEE tables (the most common), there is approximately a factor 10 speedup with "CLMUL" (Carryless multiplication) instruction:
```
benchmark old ns/op new ns/op delta
BenchmarkCrc32KB 99955 10258 -89.74%
benchmark old MB/s new MB/s speedup
BenchmarkCrc32KB 327.83 3194.20 9.74x
```
For other tables and "CLMUL" capable machines the performance is the same as the standard library.
Here are some detailed benchmarks, comparing to go 1.5 standard library with and without assembler enabled.
```
Std: Standard Go 1.5 library
Crc: Indicates IEEE type CRC.
40B: Size of each slice encoded.
NoAsm: Assembler was disabled (ie. not an AMD64 or SSE 4.2+ capable machine).
Castagnoli: Castagnoli CRC type.
BenchmarkStdCrc40B-4 10000000 158 ns/op 252.88 MB/s
BenchmarkCrc40BNoAsm-4 20000000 105 ns/op 377.38 MB/s (slice8)
BenchmarkCrc40B-4 20000000 105 ns/op 378.77 MB/s (slice8)
BenchmarkStdCrc1KB-4 500000 3604 ns/op 284.10 MB/s
BenchmarkCrc1KBNoAsm-4 1000000 1463 ns/op 699.79 MB/s (slice8)
BenchmarkCrc1KB-4 3000000 396 ns/op 2583.69 MB/s (asm)
BenchmarkStdCrc8KB-4 200000 11417 ns/op 717.48 MB/s (slice8)
BenchmarkCrc8KBNoAsm-4 200000 11317 ns/op 723.85 MB/s (slice8)
BenchmarkCrc8KB-4 500000 2919 ns/op 2805.73 MB/s (asm)
BenchmarkStdCrc32KB-4 30000 45749 ns/op 716.24 MB/s (slice8)
BenchmarkCrc32KBNoAsm-4 30000 45109 ns/op 726.42 MB/s (slice8)
BenchmarkCrc32KB-4 100000 11497 ns/op 2850.09 MB/s (asm)
BenchmarkStdNoAsmCastagnol40B-4 10000000 161 ns/op 246.94 MB/s
BenchmarkStdCastagnoli40B-4 50000000 28.4 ns/op 1410.69 MB/s (asm)
BenchmarkCastagnoli40BNoAsm-4 20000000 100 ns/op 398.01 MB/s (slice8)
BenchmarkCastagnoli40B-4 50000000 28.2 ns/op 1419.54 MB/s (asm)
BenchmarkStdNoAsmCastagnoli1KB-4 500000 3622 ns/op 282.67 MB/s
BenchmarkStdCastagnoli1KB-4 10000000 144 ns/op 7099.78 MB/s (asm)
BenchmarkCastagnoli1KBNoAsm-4 1000000 1475 ns/op 694.14 MB/s (slice8)
BenchmarkCastagnoli1KB-4 10000000 146 ns/op 6993.35 MB/s (asm)
BenchmarkStdNoAsmCastagnoli8KB-4 50000 28781 ns/op 284.63 MB/s
BenchmarkStdCastagnoli8KB-4 1000000 1029 ns/op 7957.89 MB/s (asm)
BenchmarkCastagnoli8KBNoAsm-4 200000 11410 ns/op 717.94 MB/s (slice8)
BenchmarkCastagnoli8KB-4 1000000 1000 ns/op 8188.71 MB/s (asm)
BenchmarkStdNoAsmCastagnoli32KB-4 10000 115426 ns/op 283.89 MB/s
BenchmarkStdCastagnoli32KB-4 300000 4065 ns/op 8059.13 MB/s (asm)
BenchmarkCastagnoli32KBNoAsm-4 30000 45171 ns/op 725.41 MB/s (slice8)
BenchmarkCastagnoli32KB-4 500000 4077 ns/op 8035.89 MB/s (asm)
```
The IEEE assembler optimizations has been submitted and will be part of the Go 1.6 standard library.
However, the improved use of slice-by-8 has not, but will probably be submitted for Go 1.7.
# license
Standard Go license. Changes are Copyright (c) 2015 Klaus Post under same conditions.

186
weed/vendor/github.com/klauspost/crc32/crc32.go

@ -0,0 +1,186 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package crc32 implements the 32-bit cyclic redundancy check, or CRC-32,
// checksum. See http://en.wikipedia.org/wiki/Cyclic_redundancy_check for
// information.
//
// Polynomials are represented in LSB-first form also known as reversed representation.
//
// See http://en.wikipedia.org/wiki/Mathematics_of_cyclic_redundancy_checks#Reversed_representations_and_reciprocal_polynomials
// for information.
package crc32
import (
"hash"
"sync"
)
// The size of a CRC-32 checksum in bytes.
const Size = 4
// Predefined polynomials.
const (
// IEEE is by far and away the most common CRC-32 polynomial.
// Used by ethernet (IEEE 802.3), v.42, fddi, gzip, zip, png, ...
IEEE = 0xedb88320
// Castagnoli's polynomial, used in iSCSI.
// Has better error detection characteristics than IEEE.
// http://dx.doi.org/10.1109/26.231911
Castagnoli = 0x82f63b78
// Koopman's polynomial.
// Also has better error detection characteristics than IEEE.
// http://dx.doi.org/10.1109/DSN.2002.1028931
Koopman = 0xeb31d82e
)
// Table is a 256-word table representing the polynomial for efficient processing.
type Table [256]uint32
// castagnoliTable points to a lazily initialized Table for the Castagnoli
// polynomial. MakeTable will always return this value when asked to make a
// Castagnoli table so we can compare against it to find when the caller is
// using this polynomial.
var castagnoliTable *Table
var castagnoliTable8 *slicing8Table
var castagnoliOnce sync.Once
func castagnoliInit() {
castagnoliTable = makeTable(Castagnoli)
castagnoliTable8 = makeTable8(Castagnoli)
}
// IEEETable is the table for the IEEE polynomial.
var IEEETable = makeTable(IEEE)
// slicing8Table is array of 8 Tables
type slicing8Table [8]Table
// ieeeTable8 is the slicing8Table for IEEE
var ieeeTable8 *slicing8Table
var ieeeTable8Once sync.Once
// MakeTable returns a Table constructed from the specified polynomial.
// The contents of this Table must not be modified.
func MakeTable(poly uint32) *Table {
switch poly {
case IEEE:
return IEEETable
case Castagnoli:
castagnoliOnce.Do(castagnoliInit)
return castagnoliTable
}
return makeTable(poly)
}
// makeTable returns the Table constructed from the specified polynomial.
func makeTable(poly uint32) *Table {
t := new(Table)
for i := 0; i < 256; i++ {
crc := uint32(i)
for j := 0; j < 8; j++ {
if crc&1 == 1 {
crc = (crc >> 1) ^ poly
} else {
crc >>= 1
}
}
t[i] = crc
}
return t
}
// makeTable8 returns slicing8Table constructed from the specified polynomial.
func makeTable8(poly uint32) *slicing8Table {
t := new(slicing8Table)
t[0] = *makeTable(poly)
for i := 0; i < 256; i++ {
crc := t[0][i]
for j := 1; j < 8; j++ {
crc = t[0][crc&0xFF] ^ (crc >> 8)
t[j][i] = crc
}
}
return t
}
// digest represents the partial evaluation of a checksum.
type digest struct {
crc uint32
tab *Table
}
// New creates a new hash.Hash32 computing the CRC-32 checksum
// using the polynomial represented by the Table.
// Its Sum method will lay the value out in big-endian byte order.
func New(tab *Table) hash.Hash32 { return &digest{0, tab} }
// NewIEEE creates a new hash.Hash32 computing the CRC-32 checksum
// using the IEEE polynomial.
// Its Sum method will lay the value out in big-endian byte order.
func NewIEEE() hash.Hash32 { return New(IEEETable) }
func (d *digest) Size() int { return Size }
func (d *digest) BlockSize() int { return 1 }
func (d *digest) Reset() { d.crc = 0 }
func update(crc uint32, tab *Table, p []byte) uint32 {
crc = ^crc
for _, v := range p {
crc = tab[byte(crc)^v] ^ (crc >> 8)
}
return ^crc
}
// updateSlicingBy8 updates CRC using Slicing-by-8
func updateSlicingBy8(crc uint32, tab *slicing8Table, p []byte) uint32 {
crc = ^crc
for len(p) > 8 {
crc ^= uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
crc = tab[0][p[7]] ^ tab[1][p[6]] ^ tab[2][p[5]] ^ tab[3][p[4]] ^
tab[4][crc>>24] ^ tab[5][(crc>>16)&0xFF] ^
tab[6][(crc>>8)&0xFF] ^ tab[7][crc&0xFF]
p = p[8:]
}
crc = ^crc
if len(p) == 0 {
return crc
}
return update(crc, &tab[0], p)
}
// Update returns the result of adding the bytes in p to the crc.
func Update(crc uint32, tab *Table, p []byte) uint32 {
if tab == castagnoliTable {
return updateCastagnoli(crc, p)
}
if tab == IEEETable {
return updateIEEE(crc, p)
}
return update(crc, tab, p)
}
func (d *digest) Write(p []byte) (n int, err error) {
d.crc = Update(d.crc, d.tab, p)
return len(p), nil
}
func (d *digest) Sum32() uint32 { return d.crc }
func (d *digest) Sum(in []byte) []byte {
s := d.Sum32()
return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s))
}
// Checksum returns the CRC-32 checksum of data
// using the polynomial represented by the Table.
func Checksum(data []byte, tab *Table) uint32 { return Update(0, tab, data) }
// ChecksumIEEE returns the CRC-32 checksum of data
// using the IEEE polynomial.
func ChecksumIEEE(data []byte) uint32 { return updateIEEE(0, data) }

62
weed/vendor/github.com/klauspost/crc32/crc32_amd64.go

@ -0,0 +1,62 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine,!gccgo
package crc32
// This file contains the code to call the SSE 4.2 version of the Castagnoli
// and IEEE CRC.
// haveSSE41/haveSSE42/haveCLMUL are defined in crc_amd64.s and use
// CPUID to test for SSE 4.1, 4.2 and CLMUL support.
func haveSSE41() bool
func haveSSE42() bool
func haveCLMUL() bool
// castagnoliSSE42 is defined in crc_amd64.s and uses the SSE4.2 CRC32
// instruction.
//go:noescape
func castagnoliSSE42(crc uint32, p []byte) uint32
// ieeeCLMUL is defined in crc_amd64.s and uses the PCLMULQDQ
// instruction as well as SSE 4.1.
//go:noescape
func ieeeCLMUL(crc uint32, p []byte) uint32
var sse42 = haveSSE42()
var useFastIEEE = haveCLMUL() && haveSSE41()
func updateCastagnoli(crc uint32, p []byte) uint32 {
if sse42 {
return castagnoliSSE42(crc, p)
}
// only use slicing-by-8 when input is >= 16 Bytes
if len(p) >= 16 {
return updateSlicingBy8(crc, castagnoliTable8, p)
}
return update(crc, castagnoliTable, p)
}
func updateIEEE(crc uint32, p []byte) uint32 {
if useFastIEEE && len(p) >= 64 {
left := len(p) & 15
do := len(p) - left
crc = ^ieeeCLMUL(^crc, p[:do])
if left > 0 {
crc = update(crc, IEEETable, p[do:])
}
return crc
}
// only use slicing-by-8 when input is >= 16 Bytes
if len(p) >= 16 {
ieeeTable8Once.Do(func() {
ieeeTable8 = makeTable8(IEEE)
})
return updateSlicingBy8(crc, ieeeTable8, p)
}
return update(crc, IEEETable, p)
}

237
weed/vendor/github.com/klauspost/crc32/crc32_amd64.s

@ -0,0 +1,237 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build gc
#define NOSPLIT 4
#define RODATA 8
// func castagnoliSSE42(crc uint32, p []byte) uint32
TEXT ·castagnoliSSE42(SB), NOSPLIT, $0
MOVL crc+0(FP), AX // CRC value
MOVQ p+8(FP), SI // data pointer
MOVQ p_len+16(FP), CX // len(p)
NOTL AX
// If there's less than 8 bytes to process, we do it byte-by-byte.
CMPQ CX, $8
JL cleanup
// Process individual bytes until the input is 8-byte aligned.
startup:
MOVQ SI, BX
ANDQ $7, BX
JZ aligned
CRC32B (SI), AX
DECQ CX
INCQ SI
JMP startup
aligned:
// The input is now 8-byte aligned and we can process 8-byte chunks.
CMPQ CX, $8
JL cleanup
CRC32Q (SI), AX
ADDQ $8, SI
SUBQ $8, CX
JMP aligned
cleanup:
// We may have some bytes left over that we process one at a time.
CMPQ CX, $0
JE done
CRC32B (SI), AX
INCQ SI
DECQ CX
JMP cleanup
done:
NOTL AX
MOVL AX, ret+32(FP)
RET
// func haveSSE42() bool
TEXT ·haveSSE42(SB), NOSPLIT, $0
XORQ AX, AX
INCL AX
CPUID
SHRQ $20, CX
ANDQ $1, CX
MOVB CX, ret+0(FP)
RET
// func haveCLMUL() bool
TEXT ·haveCLMUL(SB), NOSPLIT, $0
XORQ AX, AX
INCL AX
CPUID
SHRQ $1, CX
ANDQ $1, CX
MOVB CX, ret+0(FP)
RET
// func haveSSE41() bool
TEXT ·haveSSE41(SB), NOSPLIT, $0
XORQ AX, AX
INCL AX
CPUID
SHRQ $19, CX
ANDQ $1, CX
MOVB CX, ret+0(FP)
RET
// CRC32 polynomial data
//
// These constants are lifted from the
// Linux kernel, since they avoid the costly
// PSHUFB 16 byte reversal proposed in the
// original Intel paper.
DATA r2r1kp<>+0(SB)/8, $0x154442bd4
DATA r2r1kp<>+8(SB)/8, $0x1c6e41596
DATA r4r3kp<>+0(SB)/8, $0x1751997d0
DATA r4r3kp<>+8(SB)/8, $0x0ccaa009e
DATA rupolykp<>+0(SB)/8, $0x1db710641
DATA rupolykp<>+8(SB)/8, $0x1f7011641
DATA r5kp<>+0(SB)/8, $0x163cd6124
GLOBL r2r1kp<>(SB), RODATA, $16
GLOBL r4r3kp<>(SB), RODATA, $16
GLOBL rupolykp<>(SB), RODATA, $16
GLOBL r5kp<>(SB), RODATA, $8
// Based on http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
// len(p) must be at least 64, and must be a multiple of 16.
// func ieeeCLMUL(crc uint32, p []byte) uint32
TEXT ·ieeeCLMUL(SB), NOSPLIT, $0
MOVL crc+0(FP), X0 // Initial CRC value
MOVQ p+8(FP), SI // data pointer
MOVQ p_len+16(FP), CX // len(p)
MOVOU (SI), X1
MOVOU 16(SI), X2
MOVOU 32(SI), X3
MOVOU 48(SI), X4
PXOR X0, X1
ADDQ $64, SI // buf+=64
SUBQ $64, CX // len-=64
CMPQ CX, $64 // Less than 64 bytes left
JB remain64
MOVOA r2r1kp<>+0(SB), X0
loopback64:
MOVOA X1, X5
MOVOA X2, X6
MOVOA X3, X7
MOVOA X4, X8
PCLMULQDQ $0, X0, X1
PCLMULQDQ $0, X0, X2
PCLMULQDQ $0, X0, X3
PCLMULQDQ $0, X0, X4
// Load next early
MOVOU (SI), X11
MOVOU 16(SI), X12
MOVOU 32(SI), X13
MOVOU 48(SI), X14
PCLMULQDQ $0x11, X0, X5
PCLMULQDQ $0x11, X0, X6
PCLMULQDQ $0x11, X0, X7
PCLMULQDQ $0x11, X0, X8
PXOR X5, X1
PXOR X6, X2
PXOR X7, X3
PXOR X8, X4
PXOR X11, X1
PXOR X12, X2
PXOR X13, X3
PXOR X14, X4
ADDQ $0x40, DI
ADDQ $64, SI // buf+=64
SUBQ $64, CX // len-=64
CMPQ CX, $64 // Less than 64 bytes left?
JGE loopback64
// Fold result into a single register (X1)
remain64:
MOVOA r4r3kp<>+0(SB), X0
MOVOA X1, X5
PCLMULQDQ $0, X0, X1
PCLMULQDQ $0x11, X0, X5
PXOR X5, X1
PXOR X2, X1
MOVOA X1, X5
PCLMULQDQ $0, X0, X1
PCLMULQDQ $0x11, X0, X5
PXOR X5, X1
PXOR X3, X1
MOVOA X1, X5
PCLMULQDQ $0, X0, X1
PCLMULQDQ $0x11, X0, X5
PXOR X5, X1
PXOR X4, X1
// More than 16 bytes left?
CMPQ CX, $16
JB finish
// Encode 16 bytes
remain16:
MOVOU (SI), X10
MOVOA X1, X5
PCLMULQDQ $0, X0, X1
PCLMULQDQ $0x11, X0, X5
PXOR X5, X1
PXOR X10, X1
SUBQ $16, CX
ADDQ $16, SI
CMPQ CX, $16
JGE remain16
finish:
// Fold final result into 32 bits and return it
PCMPEQB X3, X3
PCLMULQDQ $1, X1, X0
PSRLDQ $8, X1
PXOR X0, X1
MOVOA X1, X2
MOVQ r5kp<>+0(SB), X0
// Creates 32 bit mask. Note that we don't care about upper half.
PSRLQ $32, X3
PSRLDQ $4, X2
PAND X3, X1
PCLMULQDQ $0, X0, X1
PXOR X2, X1
MOVOA rupolykp<>+0(SB), X0
MOVOA X1, X2
PAND X3, X1
PCLMULQDQ $0x10, X0, X1
PAND X3, X1
PCLMULQDQ $0, X0, X1
PXOR X2, X1
// PEXTRD $1, X1, AX (SSE 4.1)
BYTE $0x66; BYTE $0x0f; BYTE $0x3a
BYTE $0x16; BYTE $0xc8; BYTE $0x01
MOVL AX, ret+32(FP)
RET

40
weed/vendor/github.com/klauspost/crc32/crc32_amd64p32.go

@ -0,0 +1,40 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine,!gccgo
package crc32
// This file contains the code to call the SSE 4.2 version of the Castagnoli
// CRC.
// haveSSE42 is defined in crc_amd64p32.s and uses CPUID to test for SSE 4.2
// support.
func haveSSE42() bool
// castagnoliSSE42 is defined in crc_amd64.s and uses the SSE4.2 CRC32
// instruction.
//go:noescape
func castagnoliSSE42(crc uint32, p []byte) uint32
var sse42 = haveSSE42()
func updateCastagnoli(crc uint32, p []byte) uint32 {
if sse42 {
return castagnoliSSE42(crc, p)
}
return update(crc, castagnoliTable, p)
}
func updateIEEE(crc uint32, p []byte) uint32 {
// only use slicing-by-8 when input is >= 4KB
if len(p) >= 4096 {
ieeeTable8Once.Do(func() {
ieeeTable8 = makeTable8(IEEE)
})
return updateSlicingBy8(crc, ieeeTable8, p)
}
return update(crc, IEEETable, p)
}

67
weed/vendor/github.com/klauspost/crc32/crc32_amd64p32.s

@ -0,0 +1,67 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build gc
#define NOSPLIT 4
#define RODATA 8
// func castagnoliSSE42(crc uint32, p []byte) uint32
TEXT ·castagnoliSSE42(SB), NOSPLIT, $0
MOVL crc+0(FP), AX // CRC value
MOVL p+4(FP), SI // data pointer
MOVL p_len+8(FP), CX // len(p)
NOTL AX
// If there's less than 8 bytes to process, we do it byte-by-byte.
CMPQ CX, $8
JL cleanup
// Process individual bytes until the input is 8-byte aligned.
startup:
MOVQ SI, BX
ANDQ $7, BX
JZ aligned
CRC32B (SI), AX
DECQ CX
INCQ SI
JMP startup
aligned:
// The input is now 8-byte aligned and we can process 8-byte chunks.
CMPQ CX, $8
JL cleanup
CRC32Q (SI), AX
ADDQ $8, SI
SUBQ $8, CX
JMP aligned
cleanup:
// We may have some bytes left over that we process one at a time.
CMPQ CX, $0
JE done
CRC32B (SI), AX
INCQ SI
DECQ CX
JMP cleanup
done:
NOTL AX
MOVL AX, ret+16(FP)
RET
// func haveSSE42() bool
TEXT ·haveSSE42(SB), NOSPLIT, $0
XORQ AX, AX
INCL AX
CPUID
SHRQ $20, CX
ANDQ $1, CX
MOVB CX, ret+0(FP)
RET

29
weed/vendor/github.com/klauspost/crc32/crc32_generic.go

@ -0,0 +1,29 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64,!amd64p32 appengine gccgo
package crc32
// This file contains the generic version of updateCastagnoli which does
// slicing-by-8, or uses the fallback for very small sizes.
func updateCastagnoli(crc uint32, p []byte) uint32 {
// only use slicing-by-8 when input is >= 16 Bytes
if len(p) >= 16 {
return updateSlicingBy8(crc, castagnoliTable8, p)
}
return update(crc, castagnoliTable, p)
}
func updateIEEE(crc uint32, p []byte) uint32 {
// only use slicing-by-8 when input is >= 16 Bytes
if len(p) >= 16 {
ieeeTable8Once.Do(func() {
ieeeTable8 = makeTable8(IEEE)
})
return updateSlicingBy8(crc, ieeeTable8, p)
}
return update(crc, IEEETable, p)
}
Loading…
Cancel
Save