Browse Source

Added some proper hashing. Added a few more tests. Added a little bit more benchmarking

master
Drew Short 11 years ago
parent
commit
cf2e951c43
  1. 14
      src/main/scala/com/sothr/imagetools/dto/ImageHashDTO.scala
  2. 91
      src/main/scala/com/sothr/imagetools/hash/HashService.scala
  3. 6
      src/main/scala/com/sothr/imagetools/hash/PHash.scala
  4. 6
      src/main/scala/com/sothr/imagetools/util/PropertiesEnum.scala
  5. 64
      src/test/scala/com/sothr/imagetools/hash/HashServiceTest.scala

14
src/main/scala/com/sothr/imagetools/dto/ImageHashDTO.scala

@ -1,13 +1,23 @@
package com.sothr.imagetools.dto package com.sothr.imagetools.dto
class ImageHashDTO(val ahash:Long, val dhash:Long, val phash:Long) {
import grizzled.slf4j.Logging
class ImageHashDTO(val ahash:Long, val dhash:Long, val phash:Long) extends Logging {
def getAhash():Long = this.ahash def getAhash():Long = this.ahash
def getDhash():Long = this.dhash def getDhash():Long = this.dhash
def getPhash():Long = this.phash def getPhash():Long = this.phash
override def hashCode():Int = {
var result = 365
result = 41 * result + (this.ahash ^ (this.ahash >>> 32)).toInt
result = 37 * result + (this.dhash ^ (this.dhash >>> 32)).toInt
result = 2 * result + (this.phash ^ (this.phash >>> 32)).toInt
result
}
override def toString():String = { override def toString():String = {
return s"""ahash: $ahash
s"""ahash: $ahash
dhash: $dhash dhash: $dhash
phash: $phash""".stripMargin phash: $phash""".stripMargin
} }

91
src/main/scala/com/sothr/imagetools/hash/HashService.scala

@ -24,16 +24,16 @@ object HashService extends Logging {
var phash:Long = 0L var phash:Long = 0L
//Get Image Data //Get Image Data
val image = ImageIO.read(new File(imagePath))
val grayImage = ImageService.convertToGray(ImageIO.read(new File(imagePath)))
if (PropertiesService.get(PropertiesEnum.UseAhash.toString) == "true") { if (PropertiesService.get(PropertiesEnum.UseAhash.toString) == "true") {
ahash = getAhash(image)
ahash = getAhash(grayImage, true)
} }
if (PropertiesService.get(PropertiesEnum.UseAhash.toString) == "true") { if (PropertiesService.get(PropertiesEnum.UseAhash.toString) == "true") {
dhash = getDhash(image)
dhash = getDhash(grayImage, true)
} }
if (PropertiesService.get(PropertiesEnum.UseAhash.toString) == "true") { if (PropertiesService.get(PropertiesEnum.UseAhash.toString) == "true") {
phash = getPhash(image)
phash = getPhash(grayImage, true)
} }
val hashes = new ImageHashDTO(ahash, dhash, phash) val hashes = new ImageHashDTO(ahash, dhash, phash)
@ -42,49 +42,114 @@ object HashService extends Logging {
return hashes return hashes
} }
def getAhash(image:BufferedImage):Long = {
def getAhash(image:BufferedImage, alreadyGray:Boolean = false):Long = {
debug("Started generating an AHash") debug("Started generating an AHash")
val grayImage = ImageService.convertToGray(image)
var grayImage:BufferedImage = null
if (alreadyGray) {
grayImage = image
} else {
grayImage = ImageService.convertToGray(image)
}
val resizedImage = ImageService.resize(grayImage, PropertiesService.get(PropertiesEnum.AhashPrecision.toString).toInt, true) val resizedImage = ImageService.resize(grayImage, PropertiesService.get(PropertiesEnum.AhashPrecision.toString).toInt, true)
val imageData = ImageService.getImageData(resizedImage) val imageData = ImageService.getImageData(resizedImage)
AHash.getHash(imageData) AHash.getHash(imageData)
} }
def getDhash(image:BufferedImage):Long = {
def getDhash(image:BufferedImage, alreadyGray:Boolean = false):Long = {
debug("Started generating an DHash") debug("Started generating an DHash")
val grayImage = ImageService.convertToGray(image)
var grayImage:BufferedImage = null
if (alreadyGray) {
grayImage = image
} else {
grayImage = ImageService.convertToGray(image)
}
val resizedImage = ImageService.resize(grayImage, PropertiesService.get(PropertiesEnum.DhashPrecision.toString).toInt, true) val resizedImage = ImageService.resize(grayImage, PropertiesService.get(PropertiesEnum.DhashPrecision.toString).toInt, true)
val imageData = ImageService.getImageData(resizedImage) val imageData = ImageService.getImageData(resizedImage)
DHash.getHash(imageData) DHash.getHash(imageData)
} }
def getPhash(image:BufferedImage):Long = {
def getPhash(image:BufferedImage, alreadyGray:Boolean = false):Long = {
debug("Started generating an PHash") debug("Started generating an PHash")
val grayImage = ImageService.convertToGray(image)
var grayImage:BufferedImage = null
if (alreadyGray) {
grayImage = image
} else {
grayImage = ImageService.convertToGray(image)
}
val resizedImage = ImageService.resize(grayImage, PropertiesService.get(PropertiesEnum.PhashPrecision.toString).toInt, true) val resizedImage = ImageService.resize(grayImage, PropertiesService.get(PropertiesEnum.PhashPrecision.toString).toInt, true)
val imageData = ImageService.getImageData(resizedImage) val imageData = ImageService.getImageData(resizedImage)
PHash.getHash(imageData) PHash.getHash(imageData)
} }
def areAhashSimilar(ahash1:Long, ahash2:Long):Boolean = { def areAhashSimilar(ahash1:Long, ahash2:Long):Boolean = {
val tolerence = PropertiesService.get(PropertiesEnum.AhashTolerence.toString).toInt
val tolerence = PropertiesService.get(PropertiesEnum.AhashTolerance.toString).toInt
val distance = Hamming.getDistance(ahash1, ahash2) val distance = Hamming.getDistance(ahash1, ahash2)
debug(s"hash1: $ahash1 hash2: $ahash2 tolerence: $tolerence hamming distance: $distance") debug(s"hash1: $ahash1 hash2: $ahash2 tolerence: $tolerence hamming distance: $distance")
if (distance <= tolerence) true else false if (distance <= tolerence) true else false
} }
def areDhashSimilar(dhash1:Long, dhash2:Long):Boolean = { def areDhashSimilar(dhash1:Long, dhash2:Long):Boolean = {
val tolerence = PropertiesService.get(PropertiesEnum.DhashTolerence.toString).toInt
val tolerence = PropertiesService.get(PropertiesEnum.DhashTolerance.toString).toInt
val distance = Hamming.getDistance(dhash1, dhash2) val distance = Hamming.getDistance(dhash1, dhash2)
debug(s"hash1: $dhash1 hash2: $dhash2 tolerence: $tolerence hamming distance: $distance") debug(s"hash1: $dhash1 hash2: $dhash2 tolerence: $tolerence hamming distance: $distance")
if (distance <= tolerence) true else false if (distance <= tolerence) true else false
} }
def arePhashSimilar(phash1:Long, phash2:Long):Boolean = { def arePhashSimilar(phash1:Long, phash2:Long):Boolean = {
val tolerence = PropertiesService.get(PropertiesEnum.PhashTolerence.toString).toInt
val tolerence = PropertiesService.get(PropertiesEnum.PhashTolerance.toString).toInt
val distance = Hamming.getDistance(phash1, phash2) val distance = Hamming.getDistance(phash1, phash2)
debug(s"hash1: $phash1 hash2: $phash2 tolerence: $tolerence hamming distance: $distance") debug(s"hash1: $phash1 hash2: $phash2 tolerence: $tolerence hamming distance: $distance")
if (distance <= tolerence) true else false if (distance <= tolerence) true else false
} }
def areImageHashesSimilar(imageHash1:ImageHashDTO, imageHash2:ImageHashDTO):Boolean = {
//ahash
val aHashTolerance = PropertiesService.get(PropertiesEnum.AhashTolerance.toString).toInt
val aHashWeight = PropertiesService.get(PropertiesEnum.AhashWeight.toString).toFloat
val useAhash = PropertiesService.get(PropertiesEnum.UseAhash.toString).toBoolean
//dhash
val dHashTolerance = PropertiesService.get(PropertiesEnum.DhashTolerance.toString).toInt
val dHashWeight = PropertiesService.get(PropertiesEnum.DhashWeight.toString).toFloat
val useDhash = PropertiesService.get(PropertiesEnum.UseDhash.toString).toBoolean
//phash
val pHashTolerance = PropertiesService.get(PropertiesEnum.PhashTolerance.toString).toInt
val pHashWeight = PropertiesService.get(PropertiesEnum.PhashWeight.toString).toFloat
val usePhash = PropertiesService.get(PropertiesEnum.UsePhash.toString).toBoolean
//calculate weighted values
var weightedHammingTotal:Float = 0
var weightedToleranceTotal:Float = 0
var methodsTotal = 0
if (useAhash)
{
val hamming = Hamming.getDistance(imageHash1.getAhash(), imageHash2.getAhash())
weightedHammingTotal += hamming * aHashWeight
weightedToleranceTotal += aHashTolerance * aHashWeight
debug(s"hash1: ${imageHash1.getAhash()} hash2: ${imageHash1.getAhash()} tolerence: $aHashTolerance hamming distance: $hamming weight: $aHashWeight")
methodsTotal+=1
}
if (useDhash)
{
val hamming = Hamming.getDistance(imageHash1.getDhash(), imageHash2.getDhash())
weightedHammingTotal += hamming * dHashWeight
weightedToleranceTotal += dHashTolerance * dHashWeight
debug(s"hash1: ${imageHash1.getDhash()} hash2: ${imageHash1.getDhash()} tolerence: $dHashTolerance hamming distance: $hamming weight: $dHashWeight")
methodsTotal+=1
}
if (usePhash)
{
val hamming = Hamming.getDistance(imageHash1.getPhash(), imageHash2.getPhash())
weightedHammingTotal += hamming * pHashWeight
weightedToleranceTotal += pHashTolerance * pHashWeight
debug(s"hash1: ${imageHash1.getPhash()} hash2: ${imageHash1.getPhash()} tolerence: $pHashTolerance hamming distance: $hamming weight: $pHashWeight")
methodsTotal+=1
}
val weightedHammingMean = weightedHammingTotal / methodsTotal
val weightedToleranceMean = weightedToleranceTotal /methodsTotal
debug(s"Weighted Values Are: Hamming: $weightedHammingMean Tolerance: $weightedToleranceMean")
if (weightedHammingMean <= weightedToleranceMean) true else false
}
} }

6
src/main/scala/com/sothr/imagetools/hash/PHash.scala

@ -11,6 +11,7 @@ object PHash extends PerceptualHasher with Logging {
//convert the imageData into a FloatArray //convert the imageData into a FloatArray
val width = imageData.length val width = imageData.length
val height = imageData(0).length val height = imageData(0).length
debug(s"Starting with image of ${height}x${width} for PHash")
val imageDataFloat:Array[Array[Float]] = Array.ofDim[Float](height, width) val imageDataFloat:Array[Array[Float]] = Array.ofDim[Float](height, width)
for (row <- 0 until height) { for (row <- 0 until height) {
@ -18,10 +19,14 @@ object PHash extends PerceptualHasher with Logging {
imageDataFloat(row)(col) = imageData(row)(col).toFloat imageDataFloat(row)(col) = imageData(row)(col).toFloat
} }
} }
debug("Copied image data to float array for transform")
//debug(s"\n${imageDataFloat.deep.mkString("\n")}")
//perform transform on the data //perform transform on the data
val dct:FloatDCT_2D = new FloatDCT_2D(height,width) val dct:FloatDCT_2D = new FloatDCT_2D(height,width)
dct.forward(imageDataFloat, true) dct.forward(imageDataFloat, true)
debug("Converted image data into DCT")
//debug(s"\n${imageDataFloat.deep.mkString("\n")}")
//extract the DCT data //extract the DCT data
val dctDataWidth:Int = width / 4 val dctDataWidth:Int = width / 4
@ -35,6 +40,7 @@ object PHash extends PerceptualHasher with Logging {
} }
} }
val mean = total / (dctDataHeight * dctDataWidth) val mean = total / (dctDataHeight * dctDataWidth)
debug(s"Calculated mean as $mean from ${total}/${dctDataHeight * dctDataWidth}")
//calculate the hash //calculate the hash
var hash = 0L var hash = 0L

6
src/main/scala/com/sothr/imagetools/util/PropertiesEnum.scala

@ -14,15 +14,15 @@ object PropertiesEnum extends Enumeration {
val UseAhash = Value("image.ahash.use") val UseAhash = Value("image.ahash.use")
val AhashWeight = Value("image.ahash.weight") val AhashWeight = Value("image.ahash.weight")
val AhashPrecision = Value("image.ahash.precision") val AhashPrecision = Value("image.ahash.precision")
val AhashTolerence = Value("image.ahash.tolerence")
val AhashTolerance = Value("image.ahash.tolerence")
val UseDhash = Value("image.dhash.use") val UseDhash = Value("image.dhash.use")
val DhashWeight = Value("image.dhash.weight") val DhashWeight = Value("image.dhash.weight")
val DhashPrecision = Value("image.dhash.precision") val DhashPrecision = Value("image.dhash.precision")
val DhashTolerence = Value("image.dhash.tolerence")
val DhashTolerance = Value("image.dhash.tolerence")
val UsePhash = Value("image.phash.use") val UsePhash = Value("image.phash.use")
val PhashWeight = Value("image.phash.weight") val PhashWeight = Value("image.phash.weight")
val PhashPrecision = Value("image.phash.precision") val PhashPrecision = Value("image.phash.precision")
val PhashTolerence = Value("image.phash.tolerence")
val PhashTolerance = Value("image.phash.tolerence")
//Default Thumbnail Settings //Default Thumbnail Settings
val ThumbnailDirectory = Value("thumbnail.directory") val ThumbnailDirectory = Value("thumbnail.directory")
val ThumbnailSize = Value("thumbnail.size") val ThumbnailSize = Value("thumbnail.size")

64
src/test/scala/com/sothr/imagetools/hash/HashServiceTest.scala

@ -3,6 +3,7 @@ package com.sothr.imagetools.hash
import com.sothr.imagetools.{BaseTest, TestParams} import com.sothr.imagetools.{BaseTest, TestParams}
import javax.imageio.ImageIO import javax.imageio.ImageIO
import java.io.File import java.io.File
import com.sothr.imagetools.dto.ImageHashDTO
/** /**
* Created by dev on 1/23/14. * Created by dev on 1/23/14.
@ -255,4 +256,67 @@ class HashServiceTest extends BaseTest {
assert(HashService.arePhashSimilar(mediumHash,smallHash)) assert(HashService.arePhashSimilar(mediumHash,smallHash))
} }
def imageHashTestCase(filePath:String):ImageHashDTO = {
HashService.getImageHashes(filePath)
}
test("Benchmark getImageHashes") {
info("Benchmarking getImageHashes")
info("getImageHashes Large Image 3684x2736")
val largeTime1 = getTime { imageHashTestCase(TestParams.LargeSampleImage1) }
val largeTime2 = getTime { imageHashTestCase(TestParams.LargeSampleImage1) }
val largeTime3 = getTime { imageHashTestCase(TestParams.LargeSampleImage1) }
val largeTime4 = getTime { imageHashTestCase(TestParams.LargeSampleImage1) }
val largeTime5 = getTime { imageHashTestCase(TestParams.LargeSampleImage1) }
val largeMean = getMean(largeTime1, largeTime2, largeTime3, largeTime4, largeTime5)
info(s"The mean time of 5 tests for large was: $largeMean ms")
info("getImageHashes Medium Image 1824x1368")
val mediumTime1 = getTime { imageHashTestCase(TestParams.MediumSampleImage1) }
val mediumTime2 = getTime { imageHashTestCase(TestParams.MediumSampleImage1) }
val mediumTime3 = getTime { imageHashTestCase(TestParams.MediumSampleImage1) }
val mediumTime4 = getTime { imageHashTestCase(TestParams.MediumSampleImage1) }
val mediumTime5 = getTime { imageHashTestCase(TestParams.MediumSampleImage1) }
val mediumMean = getMean(mediumTime1, mediumTime2, mediumTime3, mediumTime4, mediumTime5)
info(s"The mean time of 5 tests for medium was: $mediumMean ms")
info("getImageHashes Small Image 912x684")
val smallTime1 = getTime { imageHashTestCase(TestParams.SmallSampleImage1) }
val smallTime2 = getTime { imageHashTestCase(TestParams.SmallSampleImage1) }
val smallTime3 = getTime { imageHashTestCase(TestParams.SmallSampleImage1) }
val smallTime4 = getTime { imageHashTestCase(TestParams.SmallSampleImage1) }
val smallTime5 = getTime { imageHashTestCase(TestParams.SmallSampleImage1) }
val smallMean = getMean(smallTime1, smallTime2, smallTime3, smallTime4, smallTime5)
info(s"The mean time of 5 tests for small was: $smallMean ms")
assert(true)
}
test("ImageHash Of Large, Medium, And Small Sample 1 Must Be Similar") {
val largeHash = imageHashTestCase(TestParams.LargeSampleImage1)
val mediumHash = imageHashTestCase(TestParams.MediumSampleImage1)
val smallHash = imageHashTestCase(TestParams.SmallSampleImage1)
assert(HashService.areImageHashesSimilar(largeHash,mediumHash))
assert(HashService.areImageHashesSimilar(largeHash,smallHash))
assert(HashService.areImageHashesSimilar(mediumHash,smallHash))
}
test("Calculate ImageHash Large Sample Image 1") {
debug("Starting 'Calculate ImageHash Large Sample Image 1' test")
val hash = HashService.getImageHashes(TestParams.LargeSampleImage1)
debug(s"Testing that ${hash.hashCode} = -812836666")
assert(hash.hashCode == -812836666)
}
test("Calculate ImageHash Medium Sample Image 1") {
debug("Starting 'Calculate ImageHash Medium Sample Image 1' test")
val hash = HashService.getImageHashes(TestParams.MediumSampleImage1)
debug(s"Testing that ${hash.hashCode} = -812836666")
assert(hash.hashCode == -812836666)
}
test("Calculate ImageHash Small Sample Image 1") {
debug("Starting 'Calculate ImageHash Small Sample Image 1' test")
val hash = HashService.getImageHashes(TestParams.SmallSampleImage1)
debug(s"Testing that ${hash.hashCode} = -812840762")
assert(hash.hashCode == -812840762)
}
} }
Loading…
Cancel
Save