Browse Source

Switched to SHA1 for the file hashing. This costs ms of time but ensures we won't get colliksions. Also is better distributed than md5

master
Drew Short 10 years ago
parent
commit
8924b118b9
  1. 2
      engine/src/main/resources/hibernate/ImageHash.hbm.xml
  2. 12
      engine/src/main/scala/com/sothr/imagetools/engine/dto/ImageHashDTO.scala
  3. 13
      engine/src/main/scala/com/sothr/imagetools/engine/hash/HashService.scala
  4. 4
      engine/src/main/scala/com/sothr/imagetools/engine/image/ImageService.scala
  5. 4
      engine/src/main/scala/com/sothr/imagetools/engine/util/Timing.scala
  6. 2
      engine/src/test/resources/hibernate/ImageHash.hbm.xml
  7. 2
      engine/src/test/resources/logback-minimum-config.xml
  8. 58
      engine/src/test/scala/com/sothr/imagetools/engine/hash/HashServiceTest.scala

2
engine/src/main/resources/hibernate/ImageHash.hbm.xml

@ -13,6 +13,6 @@
<property name="ahash" column="ahash" type="long"/>
<property name="dhash" column="dhash" type="long"/>
<property name="phash" column="phash" type="long"/>
<property name="md5" column="md5" type="string"/>
<property name="fileHash" column="fileHash" type="string"/>
</class>
</hibernate-mapping>

12
engine/src/main/scala/com/sothr/imagetools/engine/dto/ImageHashDTO.scala

@ -6,7 +6,7 @@ import grizzled.slf4j.Logging
@Entity
@Table(name = "ImageHash")
class ImageHashDTO(var ahash: Long, var dhash: Long, var phash: Long, var md5: String) extends Serializable with Logging {
class ImageHashDTO(var ahash: Long, var dhash: Long, var phash: Long, var fileHash: String) extends Serializable with Logging {
@Id
@GeneratedValue(strategy = GenerationType.AUTO)
@ -38,14 +38,14 @@ class ImageHashDTO(var ahash: Long, var dhash: Long, var phash: Long, var md5: S
phash = hash
}
def getMd5: String = md5
def getFileHash: String = fileHash
def setMd5(hash: String) = {
md5 = hash
def setFileHash(hash: String) = {
fileHash = hash
}
def cloneHashes: ImageHashDTO = {
new ImageHashDTO(ahash, dhash, phash, md5)
new ImageHashDTO(ahash, dhash, phash, fileHash)
}
override def hashCode(): Int = {
@ -57,6 +57,6 @@ class ImageHashDTO(var ahash: Long, var dhash: Long, var phash: Long, var md5: S
}
override def toString: String = {
s"MD5: $md5 ahash: $ahash dhash: $dhash phash: $phash"
s"fileHash: $fileHash ahash: $ahash dhash: $dhash phash: $phash"
}
}

13
engine/src/main/scala/com/sothr/imagetools/engine/hash/HashService.scala

@ -29,7 +29,7 @@ object HashService extends Logging {
var ahash: Long = 0L
var dhash: Long = 0L
var phash: Long = 0L
val md5: String = getMD5(imagePath)
val sha1: String = getSHA1(imagePath)
//Get Image Data
val grayImage = ImageService.convertToGray(image)
@ -44,7 +44,7 @@ object HashService extends Logging {
phash = getPhash(grayImage, alreadyGray = true)
}
val hashes = new ImageHashDTO(ahash, dhash, phash, md5)
val hashes = new ImageHashDTO(ahash, dhash, phash, sha1)
debug(s"Generated hashes: $hashes")
hashes
@ -91,8 +91,13 @@ object HashService extends Logging {
def getMD5(filePath: String): String = {
managed(new FileInputStream(filePath)) acquireAndGet {
input =>
DigestUtils.md5Hex(input)
input => DigestUtils.md5Hex(input)
}
}
def getSHA1(filePath: String): String = {
managed(new FileInputStream(filePath)) acquireAndGet {
input => DigestUtils.sha1Hex(input)
}
}

4
engine/src/main/scala/com/sothr/imagetools/engine/image/ImageService.scala

@ -28,8 +28,8 @@ object ImageService extends Logging {
debug(s"Processing image: ${file.getAbsolutePath}")
val bufferedImage = ImageIO.read(file)
val hashes = HashService.getImageHashes(bufferedImage, file.getAbsolutePath)
var thumbnailPath = lookupThumbnailPath(hashes.md5)
if (thumbnailPath == null) thumbnailPath = getThumbnail(bufferedImage, hashes.md5)
var thumbnailPath = lookupThumbnailPath(hashes.getFileHash)
if (thumbnailPath == null) thumbnailPath = getThumbnail(bufferedImage, hashes.getFileHash)
val imageSize = {
(bufferedImage.getWidth, bufferedImage.getHeight)
}

4
engine/src/main/scala/com/sothr/imagetools/engine/util/Timing.scala

@ -8,7 +8,7 @@ trait Timing extends Logging {
val t0 = System.currentTimeMillis
val result = block // call-by-name
val t1 = System.currentTimeMillis
info("Elapsed time: " + (t1 - t0) + "ms")
debug("Elapsed time: " + (t1 - t0) + "ms")
result
}
@ -16,7 +16,7 @@ trait Timing extends Logging {
val t0 = System.currentTimeMillis
val result = block // call-by-name
val t1 = System.currentTimeMillis
info("Elapsed time: " + (t1 - t0) + "ms")
debug("Elapsed time: " + (t1 - t0) + "ms")
t1 - t0
}

2
engine/src/test/resources/hibernate/ImageHash.hbm.xml

@ -13,6 +13,6 @@
<property name="ahash" column="ahash" type="long"/>
<property name="dhash" column="dhash" type="long"/>
<property name="phash" column="phash" type="long"/>
<property name="md5" column="md5" type="string"/>
<property name="fileHash" column="fileHash" type="string"/>
</class>
</hibernate-mapping>

2
engine/src/test/resources/logback-minimum-config.xml

@ -51,7 +51,7 @@
<MaxFileSize>500KB</MaxFileSize>
</triggeringPolicy>
</appender>
<root level="DEBUG">
<root level="INFO">
<appender-ref ref="DL"/>
<appender-ref ref="IL"/>
<appender-ref ref="EL"/>

58
engine/src/test/scala/com/sothr/imagetools/engine/hash/HashServiceTest.scala

@ -335,6 +335,64 @@ class HashServiceTest extends BaseTest {
assert(hash == "b137131bd55896c747286e4d247b845e")
}
def sha1TestCase(filePath: String): String = {
HashService.getSHA1(filePath)
}
test("Benchmark SHA1") {
info("Benchmarking SHA1")
info("SHA1 Large Image 3684x2736")
val time = new mutable.MutableList[Long]()
for (runNum <- 0 until benchmarkRuns) {
time += getTime {
sha1TestCase(TestParams.LargeSampleImage1)
}
}
val largeMean = getMean(time.toArray[Long])
info(s"The mean time of ${time.size} tests for large was: $largeMean ms")
time.clear()
info("SHA1 Medium Image 1824x1368")
for (runNum <- 0 until benchmarkRuns) {
time += getTime {
sha1TestCase(TestParams.MediumSampleImage1)
}
}
val mediumMean = getMean(time.toArray[Long])
info(s"The mean time of ${time.size} tests for medium was: $mediumMean ms")
time.clear()
info("SHA1 Small Image 912x684")
for (runNum <- 0 until benchmarkRuns) {
time += getTime {
sha1TestCase(TestParams.SmallSampleImage1)
}
}
val smallMean = getMean(time.toArray[Long])
info(s"The mean time of ${time.size} tests for small was: $smallMean ms")
time.clear()
assert(true)
}
test("Calculate SHA1 Large Sample Image 1") {
debug("Starting 'Calculate SHA1 Large Sample Image 1' test")
val hash = HashService.getSHA1(TestParams.LargeSampleImage1)
debug(s"Testing that $hash = 4beb6f2d852b75a313863916a1803ebad13a3196")
assert(hash == "4beb6f2d852b75a313863916a1803ebad13a3196")
}
test("Calculate SHA1 Medium Sample Image 1") {
debug("Starting 'Calculate SHA1 Medium Sample Image 1' test")
val hash = HashService.getSHA1(TestParams.MediumSampleImage1)
debug(s"Testing that $hash = edc718ce8e3556a39592ffdc214d0f636529be9f")
assert(hash == "edc718ce8e3556a39592ffdc214d0f636529be9f")
}
test("Calculate SHA1 Small Sample Image 1") {
debug("Starting 'Calculate SHA1 Small Sample Image 1' test")
val hash = HashService.getSHA1(TestParams.SmallSampleImage1)
debug(s"Testing that $hash = 1a91d2b5327f0aad258419f76b87d4c0bc343443")
assert(hash == "1a91d2b5327f0aad258419f76b87d4c0bc343443")
}
def imageHashTestWithCacheCase(filePath: String): ImageHashDTO = {
val cache = AppConfig.cacheManager.getCache("images")
var result: ImageHashDTO = null

Loading…
Cancel
Save