Browse Source

Added MD5 hashing to HashService and ImageHashDTO. Added tests against MD5 hashes. Added library for digests and hashing. Added Engine and test against Engine

master
Drew Short 10 years ago
parent
commit
dd0bccc81e
  1. 5
      pom.xml
  2. 31
      src/main/scala/com/sothr/imagetools/Engine.scala
  3. 11
      src/main/scala/com/sothr/imagetools/dto/ImageHashDTO.scala
  4. 79
      src/main/scala/com/sothr/imagetools/hash/HashService.scala
  5. 30
      src/main/scala/com/sothr/imagetools/image/Image.scala
  6. 18
      src/main/scala/com/sothr/imagetools/image/ImageFilter.scala
  7. 17
      src/main/scala/com/sothr/imagetools/image/ImageService.scala
  8. 18
      src/main/scala/com/sothr/imagetools/util/Version.scala
  9. 11
      src/test/scala/com/sothr/imagetools/EngineTest.scala
  10. 54
      src/test/scala/com/sothr/imagetools/hash/HashServiceTest.scala
  11. 31
      src/test/scala/com/sothr/imagetools/image/ImageFilterTest.scala

5
pom.xml

@ -93,6 +93,11 @@
<artifactId>commons-cli</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.9</version>
</dependency>
</dependencies>
<build>

31
src/main/scala/com/sothr/imagetools/Engine.scala

@ -0,0 +1,31 @@
package com.sothr.imagetools
import com.sothr.imagetools.image.{ImageFilter, Image}
import scala.collection.immutable
import scala.collection.mutable
import java.io.File
import grizzled.slf4j.Logging
/**
* Created by drew on 1/26/14.
*/
class Engine extends Logging{
val imageFilter:ImageFilter = new ImageFilter()
def getImagesForDirectory(directoryPath:String):List[Image] = {
val images:mutable.MutableList[Image] = new mutable.MutableList[Image]()
val directory:File = new File(directoryPath)
if (directory.isDirectory) {
val files = directory.listFiles(imageFilter)
debug(s"Found ${files.length} files that are images in directory: $directoryPath")
for (file <- files) {
images += ImageService.getImage(file)
}
} else {
error(s"Provided path: $directoryPath is not a directory")
}
images.toList
}
}

11
src/main/scala/com/sothr/imagetools/dto/ImageHashDTO.scala

@ -2,11 +2,7 @@ package com.sothr.imagetools.dto
import grizzled.slf4j.Logging
class ImageHashDTO(val ahash:Long, val dhash:Long, val phash:Long) extends Logging {
def getAhash():Long = this.ahash
def getDhash():Long = this.dhash
def getPhash():Long = this.phash
class ImageHashDTO(val ahash:Long, val dhash:Long, val phash:Long, val md5:String) extends Logging {
override def hashCode():Int = {
var result = 365
@ -16,8 +12,9 @@ class ImageHashDTO(val ahash:Long, val dhash:Long, val phash:Long) extends Loggi
result
}
override def toString():String = {
s"""ahash: $ahash
override def toString:String = {
s"""MD5: $md5
ahash: $ahash
dhash: $dhash
phash: $phash""".stripMargin
}

79
src/main/scala/com/sothr/imagetools/hash/HashService.scala

@ -6,7 +6,8 @@ import com.sothr.imagetools.util.{PropertiesEnum, PropertiesService, Hamming}
import com.sothr.imagetools.ImageService
import java.awt.image.BufferedImage
import javax.imageio.ImageIO
import java.io.File
import java.io.{FileInputStream, File}
import org.apache.commons.codec.digest.DigestUtils
/**
* A service that exposes the ability to construct perceptive hashes from an
@ -18,10 +19,11 @@ object HashService extends Logging {
def getImageHashes(imagePath:String):ImageHashDTO = {
debug(s"Creating hashes for $imagePath")
var ahash:Long = 0L
var dhash:Long = 0L
var phash:Long = 0L
val md5:String = getMD5(imagePath)
//Get Image Data
val grayImage = ImageService.convertToGray(ImageIO.read(new File(imagePath)))
@ -36,7 +38,7 @@ object HashService extends Logging {
phash = getPhash(grayImage, true)
}
val hashes = new ImageHashDTO(ahash, dhash, phash)
val hashes = new ImageHashDTO(ahash, dhash, phash, md5)
debug(s"Generated hashes: $hashes")
return hashes
@ -80,6 +82,10 @@ object HashService extends Logging {
val imageData = ImageService.getImageData(resizedImage)
PHash.getHash(imageData)
}
def getMD5(filePath:String):String = {
DigestUtils.md5Hex(new FileInputStream(filePath))
}
def areAhashSimilar(ahash1:Long, ahash2:Long):Boolean = {
val tolerence = PropertiesService.get(PropertiesEnum.AhashTolerance.toString).toInt
@ -102,7 +108,7 @@ object HashService extends Logging {
if (distance <= tolerence) true else false
}
def areImageHashesSimilar(imageHash1:ImageHashDTO, imageHash2:ImageHashDTO):Boolean = {
def getWeightedHashSimilarity(imageHash1:ImageHashDTO, imageHash2:ImageHashDTO):Float = {
//ahash
val aHashTolerance = PropertiesService.get(PropertiesEnum.AhashTolerance.toString).toInt
val aHashWeight = PropertiesService.get(PropertiesEnum.AhashWeight.toString).toFloat
@ -118,37 +124,78 @@ object HashService extends Logging {
//calculate weighted values
var weightedHammingTotal:Float = 0
var weightedToleranceTotal:Float = 0
var methodsTotal = 0
if (useAhash)
{
val hamming = Hamming.getDistance(imageHash1.getAhash(), imageHash2.getAhash())
val hamming = Hamming.getDistance(imageHash1.ahash, imageHash2.ahash)
weightedHammingTotal += hamming * aHashWeight
weightedToleranceTotal += aHashTolerance * aHashWeight
debug(s"hash1: ${imageHash1.getAhash()} hash2: ${imageHash1.getAhash()} tolerence: $aHashTolerance hamming distance: $hamming weight: $aHashWeight")
debug(s"hash1: ${imageHash1.ahash} hash2: ${imageHash1.ahash} tolerence: $aHashTolerance hamming distance: $hamming weight: $aHashWeight")
methodsTotal+=1
}
if (useDhash)
{
val hamming = Hamming.getDistance(imageHash1.getDhash(), imageHash2.getDhash())
val hamming = Hamming.getDistance(imageHash1.dhash, imageHash2.dhash)
weightedHammingTotal += hamming * dHashWeight
weightedToleranceTotal += dHashTolerance * dHashWeight
debug(s"hash1: ${imageHash1.getDhash()} hash2: ${imageHash1.getDhash()} tolerence: $dHashTolerance hamming distance: $hamming weight: $dHashWeight")
debug(s"hash1: ${imageHash1.dhash} hash2: ${imageHash1.dhash} tolerence: $dHashTolerance hamming distance: $hamming weight: $dHashWeight")
methodsTotal+=1
}
if (usePhash)
{
val hamming = Hamming.getDistance(imageHash1.getPhash(), imageHash2.getPhash())
val hamming = Hamming.getDistance(imageHash1.phash, imageHash2.phash)
weightedHammingTotal += hamming * pHashWeight
weightedToleranceTotal += pHashTolerance * pHashWeight
debug(s"hash1: ${imageHash1.getPhash()} hash2: ${imageHash1.getPhash()} tolerence: $pHashTolerance hamming distance: $hamming weight: $pHashWeight")
debug(s"hash1: ${imageHash1.phash} hash2: ${imageHash1.phash} tolerence: $pHashTolerance hamming distance: $hamming weight: $pHashWeight")
methodsTotal+=1
}
val weightedHammingMean = weightedHammingTotal / methodsTotal
val weightedToleranceMean = weightedToleranceTotal /methodsTotal
debug(s"Weighted Values Are: Hamming: $weightedHammingMean Tolerance: $weightedToleranceMean")
debug(s"Calculated Weighted Hamming Mean: $weightedHammingMean")
weightedHammingMean
}
def getWeightedHashTolerence:Float = {
//ahash
val aHashTolerance = PropertiesService.get(PropertiesEnum.AhashTolerance.toString).toInt
val aHashWeight = PropertiesService.get(PropertiesEnum.AhashWeight.toString).toFloat
val useAhash = PropertiesService.get(PropertiesEnum.UseAhash.toString).toBoolean
//dhash
val dHashTolerance = PropertiesService.get(PropertiesEnum.DhashTolerance.toString).toInt
val dHashWeight = PropertiesService.get(PropertiesEnum.DhashWeight.toString).toFloat
val useDhash = PropertiesService.get(PropertiesEnum.UseDhash.toString).toBoolean
//phash
val pHashTolerance = PropertiesService.get(PropertiesEnum.PhashTolerance.toString).toInt
val pHashWeight = PropertiesService.get(PropertiesEnum.PhashWeight.toString).toFloat
val usePhash = PropertiesService.get(PropertiesEnum.UsePhash.toString).toBoolean
//calculate weighted values
var weightedToleranceTotal:Float = 0
var methodsTotal = 0
if (useAhash)
{
weightedToleranceTotal += aHashTolerance * aHashWeight
debug(s"Ahash Tolerance: $aHashTolerance Current Weighted Tolerance: $weightedToleranceTotal")
methodsTotal+=1
}
if (useDhash)
{
weightedToleranceTotal += dHashTolerance * dHashWeight
debug(s"Dhash Tolerance: $dHashTolerance Current Weighted Tolerance: $weightedToleranceTotal")
methodsTotal+=1
}
if (usePhash)
{
weightedToleranceTotal += pHashTolerance * pHashWeight
debug(s"Phash Tolerance: $pHashTolerance Current Weighted Tolerance: $weightedToleranceTotal")
methodsTotal+=1
}
val weightedTolerance = weightedToleranceTotal / methodsTotal
debug(s"Calculated Weighted Tolerance: $weightedTolerance")
weightedTolerance
}
def areImageHashesSimilar(imageHash1:ImageHashDTO, imageHash2:ImageHashDTO):Boolean = {
val weightedHammingMean = getWeightedHashSimilarity(imageHash1, imageHash2)
val weightedToleranceMean = getWeightedHashTolerence
if (weightedHammingMean <= weightedToleranceMean) true else false
}

30
src/main/scala/com/sothr/imagetools/image/Image.scala

@ -2,26 +2,34 @@ package com.sothr.imagetools.image
import scala.collection.Traversable
import com.sothr.imagetools.dto.ImageHashDTO
import com.sothr.imagetools.hash.HashService
abstract class Image(val imagePath:String, val thumbnailPath:String, protected var hashes:ImageHashDTO = null) {
class Image(val imagePath:String, val thumbnailPath:String, var hashes:ImageHashDTO = null) {
protected val imageType:ImageType = ImageType.SingleFrameImage
var imageType:ImageType = ImageType.SingleFrameImage
def getHashes():ImageHashDTO = this.hashes
def setHashes(newHashes:ImageHashDTO) = { this.hashes = newHashes }
def isSimilarTo(otherImage:Image):Boolean = {
HashService.areImageHashesSimilar(this.hashes,otherImage.hashes)
}
def getSimilarity(otherImage:Image):Float = {
HashService.getWeightedHashSimilarity(this.hashes, otherImage.hashes)
}
def isSimilarTo(otherImage:Image):Boolean
/*def getSimilar(otherImages:Traversable[Image]):Traversable[Image] = {
def getSimilarity(otherImage:Image)
}*/
def getSimilar(otherImages:Traversable[Image]):Traversable[Image]
def getPath:String = {
this.imagePath
}
def getPath():String = {
return this.imagePath
def getThumbnailPath:String = {
this.thumbnailPath
}
def getThumbnailPath():String = {
return this.thumbnailPath
override def toString:String = {
s"Image: $imagePath Thumbnail: $thumbnailPath Hashes: $hashes"
}
}

18
src/main/scala/com/sothr/imagetools/image/ImageFilter.scala

@ -0,0 +1,18 @@
package com.sothr.imagetools.image
import java.io.{File, FilenameFilter}
import scala.collection.immutable.HashSet
/**
* Created by drew on 1/26/14.
*/
class ImageFilter extends FilenameFilter {
private val extensions:HashSet[String] = new HashSet[String]() ++ Array("png", "bmp", "gif", "jpg", "jpeg")
def accept(dir: File, name: String): Boolean = {
val splitName = name.split('.')
val extension = if (splitName.length > 1) splitName(splitName.length-1) else ""
if (extensions.contains(extension)) true else false
}
}

17
src/main/scala/com/sothr/imagetools/image/ImageService.scala

@ -3,9 +3,24 @@ package com.sothr.imagetools
import grizzled.slf4j.Logging
import java.awt.image.{DataBufferByte, BufferedImage, ColorConvertOp}
import net.coobird.thumbnailator.Thumbnails
import java.io.File
import com.sothr.imagetools.image.Image
import com.sothr.imagetools.hash.HashService
object ImageService extends Logging {
def getImage(file:File):Image = {
val thumbnailPath = getThumbnailPath(file)
val hashes = HashService.getImageHashes(file.getAbsolutePath)
val image = new Image(file.getAbsolutePath, thumbnailPath, hashes)
debug(s"Created image: $image")
image
}
def getThumbnailPath(file:File):String = {
"."
}
/**
* Get the raw data for an image
*/

18
src/main/scala/com/sothr/imagetools/util/Version.scala

@ -1,15 +1,25 @@
package com.sothr.imagetools.util
import grizzled.slf4j.Logging
import java.lang.NumberFormatException
/**
* Created by drew on 1/6/14.
*/
class Version(val versionString:String) {
class Version(val versionString:String) extends Logging{
//parse version into parts
//typical version string i.e. 0.1.0-DEV-27-060aec7
val (major,minor,patch,buildTag,buildNumber,buildHash) = {
val splitVersion = versionString.split("""\.""")
val splitType = splitVersion(splitVersion.length-1).split("""-""")
(splitVersion(0).toInt,splitVersion(1).toInt,splitType(0).toInt,splitType(1),splitType(2),splitType(3))
var version:Tuple6[Int,Int,Int,String,Int,String] = (0,0,0,"DEV",0,"asdfzxcv")
try {
val splitVersion = versionString.split("""\.""")
val splitType = splitVersion(splitVersion.length-1).split("""-""")
version = (splitVersion(0).toInt,splitVersion(1).toInt,splitType(0).toInt,splitType(1),splitType(2).toInt,splitType(3))
} catch {
case nfe:NumberFormatException => error(s"Error parsing number from version string '$versionString'", nfe)
case e:Exception => error(s"Unexpected error parsing version string '$versionString'", e)
}
version
}
/*

11
src/test/scala/com/sothr/imagetools/EngineTest.scala

@ -0,0 +1,11 @@
package com.sothr.imagetools
/**
* Created by drew on 1/26/14.
*/
class EngineTest extends BaseTest{
test("Test getImagesForDirectory for sample directory") {
val engine:Engine = new Engine()
assertResult(3) { engine.getImagesForDirectory("sample").length }
}
}

54
src/test/scala/com/sothr/imagetools/hash/HashServiceTest.scala

@ -256,6 +256,60 @@ class HashServiceTest extends BaseTest {
assert(HashService.arePhashSimilar(mediumHash,smallHash))
}
def md5TestCase(filePath:String):String = {
HashService.getMD5(filePath)
}
test("Benchmark MD5") {
info("Benchmarking MD5")
info("PHash Large Image 3684x2736")
val largeTime1 = getTime { md5TestCase(TestParams.LargeSampleImage1) }
val largeTime2 = getTime { md5TestCase(TestParams.LargeSampleImage1) }
val largeTime3 = getTime { md5TestCase(TestParams.LargeSampleImage1) }
val largeTime4 = getTime { md5TestCase(TestParams.LargeSampleImage1) }
val largeTime5 = getTime { md5TestCase(TestParams.LargeSampleImage1) }
val largeMean = getMean(largeTime1, largeTime2, largeTime3, largeTime4, largeTime5)
info(s"The mean time of 5 tests for large was: $largeMean ms")
info("PHash Medium Image 1824x1368")
val mediumTime1 = getTime { md5TestCase(TestParams.MediumSampleImage1) }
val mediumTime2 = getTime { md5TestCase(TestParams.MediumSampleImage1) }
val mediumTime3 = getTime { md5TestCase(TestParams.MediumSampleImage1) }
val mediumTime4 = getTime { md5TestCase(TestParams.MediumSampleImage1) }
val mediumTime5 = getTime { md5TestCase(TestParams.MediumSampleImage1) }
val mediumMean = getMean(mediumTime1, mediumTime2, mediumTime3, mediumTime4, mediumTime5)
info(s"The mean time of 5 tests for medium was: $mediumMean ms")
info("PHash Small Image 912x684")
val smallTime1 = getTime { md5TestCase(TestParams.SmallSampleImage1) }
val smallTime2 = getTime { md5TestCase(TestParams.SmallSampleImage1) }
val smallTime3 = getTime { md5TestCase(TestParams.SmallSampleImage1) }
val smallTime4 = getTime { md5TestCase(TestParams.SmallSampleImage1) }
val smallTime5 = getTime { md5TestCase(TestParams.SmallSampleImage1) }
val smallMean = getMean(smallTime1, smallTime2, smallTime3, smallTime4, smallTime5)
info(s"The mean time of 5 tests for small was: $smallMean ms")
assert(true)
}
test("Calculate MD5 Large Sample Image 1") {
debug("Starting 'Calculate MD5 Large Sample Image 1' test")
val hash = HashService.getMD5(TestParams.LargeSampleImage1)
debug(s"Testing that $hash = 3fbccfd5faf3f991435b827ee5961862")
assert(hash == "3fbccfd5faf3f991435b827ee5961862")
}
test("Calculate MD5 Medium Sample Image 1") {
debug("Starting 'Calculate MD5 Medium Sample Image 1' test")
val hash = HashService.getMD5(TestParams.MediumSampleImage1)
debug(s"Testing that $hash = a95e2cc4610307eb957e9c812429c53e")
assert(hash == "a95e2cc4610307eb957e9c812429c53e")
}
test("Calculate MD5 Small Sample Image 1") {
debug("Starting 'Calculate MD5 Small Sample Image 1' test")
val hash = HashService.getMD5(TestParams.SmallSampleImage1)
debug(s"Testing that $hash = b137131bd55896c747286e4d247b845e")
assert(hash == "b137131bd55896c747286e4d247b845e")
}
def imageHashTestCase(filePath:String):ImageHashDTO = {
HashService.getImageHashes(filePath)
}

31
src/test/scala/com/sothr/imagetools/image/ImageFilterTest.scala

@ -0,0 +1,31 @@
package com.sothr.imagetools.image
import com.sothr.imagetools.BaseTest
import java.io.File
/**
* Created by drew on 1/26/14.
*/
class ImageFilterTest extends BaseTest{
test("Confirm ImageFilter Works") {
val filter:ImageFilter = new ImageFilter()
val bogusDirectory = new File(".")
assert(filter.accept(bogusDirectory, "test.png"))
assert(filter.accept(bogusDirectory, "test.bmp"))
assert(filter.accept(bogusDirectory, "test.gif"))
assert(filter.accept(bogusDirectory, "test.jpg"))
assert(filter.accept(bogusDirectory, "test.jpeg"))
assert(filter.accept(bogusDirectory, "test.jpeg.jpg"))
}
test("Confirm ImageFiler Fails") {
val filter:ImageFilter = new ImageFilter()
val bogusDirectory = new File(".")
assertResult(false) { filter.accept(bogusDirectory,"test") }
assertResult(false) { filter.accept(bogusDirectory,"test.mp4") }
assertResult(false) { filter.accept(bogusDirectory,"test.gif.mp4") }
assertResult(false) { filter.accept(bogusDirectory,"") }
}
}
Loading…
Cancel
Save