From 8924b118b9e9393a02a76e78fd8eca46c5b56db9 Mon Sep 17 00:00:00 2001 From: Drew Short Date: Mon, 8 Dec 2014 22:19:38 -0600 Subject: [PATCH] Switched to SHA1 for the file hashing. This costs ms of time but ensures we won't get colliksions. Also is better distributed than md5 --- .../resources/hibernate/ImageHash.hbm.xml | 2 +- .../imagetools/engine/dto/ImageHashDTO.scala | 12 ++-- .../imagetools/engine/hash/HashService.scala | 13 +++-- .../engine/image/ImageService.scala | 4 +- .../sothr/imagetools/engine/util/Timing.scala | 4 +- .../resources/hibernate/ImageHash.hbm.xml | 2 +- .../test/resources/logback-minimum-config.xml | 2 +- .../engine/hash/HashServiceTest.scala | 58 +++++++++++++++++++ 8 files changed, 80 insertions(+), 17 deletions(-) diff --git a/engine/src/main/resources/hibernate/ImageHash.hbm.xml b/engine/src/main/resources/hibernate/ImageHash.hbm.xml index 880cafe..af4e6a2 100644 --- a/engine/src/main/resources/hibernate/ImageHash.hbm.xml +++ b/engine/src/main/resources/hibernate/ImageHash.hbm.xml @@ -13,6 +13,6 @@ - + \ No newline at end of file diff --git a/engine/src/main/scala/com/sothr/imagetools/engine/dto/ImageHashDTO.scala b/engine/src/main/scala/com/sothr/imagetools/engine/dto/ImageHashDTO.scala index 7314fa0..f6909f9 100644 --- a/engine/src/main/scala/com/sothr/imagetools/engine/dto/ImageHashDTO.scala +++ b/engine/src/main/scala/com/sothr/imagetools/engine/dto/ImageHashDTO.scala @@ -6,7 +6,7 @@ import grizzled.slf4j.Logging @Entity @Table(name = "ImageHash") -class ImageHashDTO(var ahash: Long, var dhash: Long, var phash: Long, var md5: String) extends Serializable with Logging { +class ImageHashDTO(var ahash: Long, var dhash: Long, var phash: Long, var fileHash: String) extends Serializable with Logging { @Id @GeneratedValue(strategy = GenerationType.AUTO) @@ -38,14 +38,14 @@ class ImageHashDTO(var ahash: Long, var dhash: Long, var phash: Long, var md5: S phash = hash } - def getMd5: String = md5 + def getFileHash: String = fileHash - def setMd5(hash: String) = { - md5 = hash + def setFileHash(hash: String) = { + fileHash = hash } def cloneHashes: ImageHashDTO = { - new ImageHashDTO(ahash, dhash, phash, md5) + new ImageHashDTO(ahash, dhash, phash, fileHash) } override def hashCode(): Int = { @@ -57,6 +57,6 @@ class ImageHashDTO(var ahash: Long, var dhash: Long, var phash: Long, var md5: S } override def toString: String = { - s"MD5: $md5 ahash: $ahash dhash: $dhash phash: $phash" + s"fileHash: $fileHash ahash: $ahash dhash: $dhash phash: $phash" } } diff --git a/engine/src/main/scala/com/sothr/imagetools/engine/hash/HashService.scala b/engine/src/main/scala/com/sothr/imagetools/engine/hash/HashService.scala index baf6397..1aa1962 100644 --- a/engine/src/main/scala/com/sothr/imagetools/engine/hash/HashService.scala +++ b/engine/src/main/scala/com/sothr/imagetools/engine/hash/HashService.scala @@ -29,7 +29,7 @@ object HashService extends Logging { var ahash: Long = 0L var dhash: Long = 0L var phash: Long = 0L - val md5: String = getMD5(imagePath) + val sha1: String = getSHA1(imagePath) //Get Image Data val grayImage = ImageService.convertToGray(image) @@ -44,7 +44,7 @@ object HashService extends Logging { phash = getPhash(grayImage, alreadyGray = true) } - val hashes = new ImageHashDTO(ahash, dhash, phash, md5) + val hashes = new ImageHashDTO(ahash, dhash, phash, sha1) debug(s"Generated hashes: $hashes") hashes @@ -91,8 +91,13 @@ object HashService extends Logging { def getMD5(filePath: String): String = { managed(new FileInputStream(filePath)) acquireAndGet { - input => - DigestUtils.md5Hex(input) + input => DigestUtils.md5Hex(input) + } + } + + def getSHA1(filePath: String): String = { + managed(new FileInputStream(filePath)) acquireAndGet { + input => DigestUtils.sha1Hex(input) } } diff --git a/engine/src/main/scala/com/sothr/imagetools/engine/image/ImageService.scala b/engine/src/main/scala/com/sothr/imagetools/engine/image/ImageService.scala index cb4db2b..a3c18ab 100644 --- a/engine/src/main/scala/com/sothr/imagetools/engine/image/ImageService.scala +++ b/engine/src/main/scala/com/sothr/imagetools/engine/image/ImageService.scala @@ -28,8 +28,8 @@ object ImageService extends Logging { debug(s"Processing image: ${file.getAbsolutePath}") val bufferedImage = ImageIO.read(file) val hashes = HashService.getImageHashes(bufferedImage, file.getAbsolutePath) - var thumbnailPath = lookupThumbnailPath(hashes.md5) - if (thumbnailPath == null) thumbnailPath = getThumbnail(bufferedImage, hashes.md5) + var thumbnailPath = lookupThumbnailPath(hashes.getFileHash) + if (thumbnailPath == null) thumbnailPath = getThumbnail(bufferedImage, hashes.getFileHash) val imageSize = { (bufferedImage.getWidth, bufferedImage.getHeight) } diff --git a/engine/src/main/scala/com/sothr/imagetools/engine/util/Timing.scala b/engine/src/main/scala/com/sothr/imagetools/engine/util/Timing.scala index d57ff31..041a8d0 100644 --- a/engine/src/main/scala/com/sothr/imagetools/engine/util/Timing.scala +++ b/engine/src/main/scala/com/sothr/imagetools/engine/util/Timing.scala @@ -8,7 +8,7 @@ trait Timing extends Logging { val t0 = System.currentTimeMillis val result = block // call-by-name val t1 = System.currentTimeMillis - info("Elapsed time: " + (t1 - t0) + "ms") + debug("Elapsed time: " + (t1 - t0) + "ms") result } @@ -16,7 +16,7 @@ trait Timing extends Logging { val t0 = System.currentTimeMillis val result = block // call-by-name val t1 = System.currentTimeMillis - info("Elapsed time: " + (t1 - t0) + "ms") + debug("Elapsed time: " + (t1 - t0) + "ms") t1 - t0 } diff --git a/engine/src/test/resources/hibernate/ImageHash.hbm.xml b/engine/src/test/resources/hibernate/ImageHash.hbm.xml index 880cafe..af4e6a2 100644 --- a/engine/src/test/resources/hibernate/ImageHash.hbm.xml +++ b/engine/src/test/resources/hibernate/ImageHash.hbm.xml @@ -13,6 +13,6 @@ - + \ No newline at end of file diff --git a/engine/src/test/resources/logback-minimum-config.xml b/engine/src/test/resources/logback-minimum-config.xml index ace343d..ca251cc 100644 --- a/engine/src/test/resources/logback-minimum-config.xml +++ b/engine/src/test/resources/logback-minimum-config.xml @@ -51,7 +51,7 @@ 500KB - + diff --git a/engine/src/test/scala/com/sothr/imagetools/engine/hash/HashServiceTest.scala b/engine/src/test/scala/com/sothr/imagetools/engine/hash/HashServiceTest.scala index be5da28..9a19cb3 100644 --- a/engine/src/test/scala/com/sothr/imagetools/engine/hash/HashServiceTest.scala +++ b/engine/src/test/scala/com/sothr/imagetools/engine/hash/HashServiceTest.scala @@ -335,6 +335,64 @@ class HashServiceTest extends BaseTest { assert(hash == "b137131bd55896c747286e4d247b845e") } + def sha1TestCase(filePath: String): String = { + HashService.getSHA1(filePath) + } + + test("Benchmark SHA1") { + info("Benchmarking SHA1") + info("SHA1 Large Image 3684x2736") + val time = new mutable.MutableList[Long]() + for (runNum <- 0 until benchmarkRuns) { + time += getTime { + sha1TestCase(TestParams.LargeSampleImage1) + } + } + val largeMean = getMean(time.toArray[Long]) + info(s"The mean time of ${time.size} tests for large was: $largeMean ms") + time.clear() + info("SHA1 Medium Image 1824x1368") + for (runNum <- 0 until benchmarkRuns) { + time += getTime { + sha1TestCase(TestParams.MediumSampleImage1) + } + } + val mediumMean = getMean(time.toArray[Long]) + info(s"The mean time of ${time.size} tests for medium was: $mediumMean ms") + time.clear() + info("SHA1 Small Image 912x684") + for (runNum <- 0 until benchmarkRuns) { + time += getTime { + sha1TestCase(TestParams.SmallSampleImage1) + } + } + val smallMean = getMean(time.toArray[Long]) + info(s"The mean time of ${time.size} tests for small was: $smallMean ms") + time.clear() + assert(true) + } + + test("Calculate SHA1 Large Sample Image 1") { + debug("Starting 'Calculate SHA1 Large Sample Image 1' test") + val hash = HashService.getSHA1(TestParams.LargeSampleImage1) + debug(s"Testing that $hash = 4beb6f2d852b75a313863916a1803ebad13a3196") + assert(hash == "4beb6f2d852b75a313863916a1803ebad13a3196") + } + + test("Calculate SHA1 Medium Sample Image 1") { + debug("Starting 'Calculate SHA1 Medium Sample Image 1' test") + val hash = HashService.getSHA1(TestParams.MediumSampleImage1) + debug(s"Testing that $hash = edc718ce8e3556a39592ffdc214d0f636529be9f") + assert(hash == "edc718ce8e3556a39592ffdc214d0f636529be9f") + } + + test("Calculate SHA1 Small Sample Image 1") { + debug("Starting 'Calculate SHA1 Small Sample Image 1' test") + val hash = HashService.getSHA1(TestParams.SmallSampleImage1) + debug(s"Testing that $hash = 1a91d2b5327f0aad258419f76b87d4c0bc343443") + assert(hash == "1a91d2b5327f0aad258419f76b87d4c0bc343443") + } + def imageHashTestWithCacheCase(filePath: String): ImageHashDTO = { val cache = AppConfig.cacheManager.getCache("images") var result: ImageHashDTO = null