diff --git a/pom.xml b/pom.xml index 469ae9f..085b4db 100644 --- a/pom.xml +++ b/pom.xml @@ -93,6 +93,11 @@ commons-cli 1.2 + + commons-codec + commons-codec + 1.9 + diff --git a/src/main/scala/com/sothr/imagetools/Engine.scala b/src/main/scala/com/sothr/imagetools/Engine.scala new file mode 100644 index 0000000..7b7d66f --- /dev/null +++ b/src/main/scala/com/sothr/imagetools/Engine.scala @@ -0,0 +1,31 @@ +package com.sothr.imagetools + +import com.sothr.imagetools.image.{ImageFilter, Image} +import scala.collection.immutable +import scala.collection.mutable +import java.io.File +import grizzled.slf4j.Logging + +/** + * Created by drew on 1/26/14. + */ +class Engine extends Logging{ + + val imageFilter:ImageFilter = new ImageFilter() + + def getImagesForDirectory(directoryPath:String):List[Image] = { + val images:mutable.MutableList[Image] = new mutable.MutableList[Image]() + val directory:File = new File(directoryPath) + if (directory.isDirectory) { + val files = directory.listFiles(imageFilter) + debug(s"Found ${files.length} files that are images in directory: $directoryPath") + for (file <- files) { + images += ImageService.getImage(file) + } + } else { + error(s"Provided path: $directoryPath is not a directory") + } + images.toList + } + +} diff --git a/src/main/scala/com/sothr/imagetools/dto/ImageHashDTO.scala b/src/main/scala/com/sothr/imagetools/dto/ImageHashDTO.scala index 7812825..7e1d398 100644 --- a/src/main/scala/com/sothr/imagetools/dto/ImageHashDTO.scala +++ b/src/main/scala/com/sothr/imagetools/dto/ImageHashDTO.scala @@ -2,11 +2,7 @@ package com.sothr.imagetools.dto import grizzled.slf4j.Logging -class ImageHashDTO(val ahash:Long, val dhash:Long, val phash:Long) extends Logging { - - def getAhash():Long = this.ahash - def getDhash():Long = this.dhash - def getPhash():Long = this.phash +class ImageHashDTO(val ahash:Long, val dhash:Long, val phash:Long, val md5:String) extends Logging { override def hashCode():Int = { var result = 365 @@ -16,8 +12,9 @@ class ImageHashDTO(val ahash:Long, val dhash:Long, val phash:Long) extends Loggi result } - override def toString():String = { - s"""ahash: $ahash + override def toString:String = { + s"""MD5: $md5 + ahash: $ahash dhash: $dhash phash: $phash""".stripMargin } diff --git a/src/main/scala/com/sothr/imagetools/hash/HashService.scala b/src/main/scala/com/sothr/imagetools/hash/HashService.scala index 52429ce..ff61774 100644 --- a/src/main/scala/com/sothr/imagetools/hash/HashService.scala +++ b/src/main/scala/com/sothr/imagetools/hash/HashService.scala @@ -6,7 +6,8 @@ import com.sothr.imagetools.util.{PropertiesEnum, PropertiesService, Hamming} import com.sothr.imagetools.ImageService import java.awt.image.BufferedImage import javax.imageio.ImageIO -import java.io.File +import java.io.{FileInputStream, File} +import org.apache.commons.codec.digest.DigestUtils /** * A service that exposes the ability to construct perceptive hashes from an @@ -18,10 +19,11 @@ object HashService extends Logging { def getImageHashes(imagePath:String):ImageHashDTO = { debug(s"Creating hashes for $imagePath") - + var ahash:Long = 0L var dhash:Long = 0L var phash:Long = 0L + val md5:String = getMD5(imagePath) //Get Image Data val grayImage = ImageService.convertToGray(ImageIO.read(new File(imagePath))) @@ -36,7 +38,7 @@ object HashService extends Logging { phash = getPhash(grayImage, true) } - val hashes = new ImageHashDTO(ahash, dhash, phash) + val hashes = new ImageHashDTO(ahash, dhash, phash, md5) debug(s"Generated hashes: $hashes") return hashes @@ -80,6 +82,10 @@ object HashService extends Logging { val imageData = ImageService.getImageData(resizedImage) PHash.getHash(imageData) } + + def getMD5(filePath:String):String = { + DigestUtils.md5Hex(new FileInputStream(filePath)) + } def areAhashSimilar(ahash1:Long, ahash2:Long):Boolean = { val tolerence = PropertiesService.get(PropertiesEnum.AhashTolerance.toString).toInt @@ -102,7 +108,7 @@ object HashService extends Logging { if (distance <= tolerence) true else false } - def areImageHashesSimilar(imageHash1:ImageHashDTO, imageHash2:ImageHashDTO):Boolean = { + def getWeightedHashSimilarity(imageHash1:ImageHashDTO, imageHash2:ImageHashDTO):Float = { //ahash val aHashTolerance = PropertiesService.get(PropertiesEnum.AhashTolerance.toString).toInt val aHashWeight = PropertiesService.get(PropertiesEnum.AhashWeight.toString).toFloat @@ -118,37 +124,78 @@ object HashService extends Logging { //calculate weighted values var weightedHammingTotal:Float = 0 - var weightedToleranceTotal:Float = 0 var methodsTotal = 0 if (useAhash) { - val hamming = Hamming.getDistance(imageHash1.getAhash(), imageHash2.getAhash()) + val hamming = Hamming.getDistance(imageHash1.ahash, imageHash2.ahash) weightedHammingTotal += hamming * aHashWeight - weightedToleranceTotal += aHashTolerance * aHashWeight - debug(s"hash1: ${imageHash1.getAhash()} hash2: ${imageHash1.getAhash()} tolerence: $aHashTolerance hamming distance: $hamming weight: $aHashWeight") + debug(s"hash1: ${imageHash1.ahash} hash2: ${imageHash1.ahash} tolerence: $aHashTolerance hamming distance: $hamming weight: $aHashWeight") methodsTotal+=1 } if (useDhash) { - val hamming = Hamming.getDistance(imageHash1.getDhash(), imageHash2.getDhash()) + val hamming = Hamming.getDistance(imageHash1.dhash, imageHash2.dhash) weightedHammingTotal += hamming * dHashWeight - weightedToleranceTotal += dHashTolerance * dHashWeight - debug(s"hash1: ${imageHash1.getDhash()} hash2: ${imageHash1.getDhash()} tolerence: $dHashTolerance hamming distance: $hamming weight: $dHashWeight") + debug(s"hash1: ${imageHash1.dhash} hash2: ${imageHash1.dhash} tolerence: $dHashTolerance hamming distance: $hamming weight: $dHashWeight") methodsTotal+=1 } if (usePhash) { - val hamming = Hamming.getDistance(imageHash1.getPhash(), imageHash2.getPhash()) + val hamming = Hamming.getDistance(imageHash1.phash, imageHash2.phash) weightedHammingTotal += hamming * pHashWeight - weightedToleranceTotal += pHashTolerance * pHashWeight - debug(s"hash1: ${imageHash1.getPhash()} hash2: ${imageHash1.getPhash()} tolerence: $pHashTolerance hamming distance: $hamming weight: $pHashWeight") + debug(s"hash1: ${imageHash1.phash} hash2: ${imageHash1.phash} tolerence: $pHashTolerance hamming distance: $hamming weight: $pHashWeight") methodsTotal+=1 } val weightedHammingMean = weightedHammingTotal / methodsTotal - val weightedToleranceMean = weightedToleranceTotal /methodsTotal - debug(s"Weighted Values Are: Hamming: $weightedHammingMean Tolerance: $weightedToleranceMean") + debug(s"Calculated Weighted Hamming Mean: $weightedHammingMean") + weightedHammingMean + } + + def getWeightedHashTolerence:Float = { + //ahash + val aHashTolerance = PropertiesService.get(PropertiesEnum.AhashTolerance.toString).toInt + val aHashWeight = PropertiesService.get(PropertiesEnum.AhashWeight.toString).toFloat + val useAhash = PropertiesService.get(PropertiesEnum.UseAhash.toString).toBoolean + //dhash + val dHashTolerance = PropertiesService.get(PropertiesEnum.DhashTolerance.toString).toInt + val dHashWeight = PropertiesService.get(PropertiesEnum.DhashWeight.toString).toFloat + val useDhash = PropertiesService.get(PropertiesEnum.UseDhash.toString).toBoolean + //phash + val pHashTolerance = PropertiesService.get(PropertiesEnum.PhashTolerance.toString).toInt + val pHashWeight = PropertiesService.get(PropertiesEnum.PhashWeight.toString).toFloat + val usePhash = PropertiesService.get(PropertiesEnum.UsePhash.toString).toBoolean + //calculate weighted values + var weightedToleranceTotal:Float = 0 + var methodsTotal = 0 + + if (useAhash) + { + weightedToleranceTotal += aHashTolerance * aHashWeight + debug(s"Ahash Tolerance: $aHashTolerance Current Weighted Tolerance: $weightedToleranceTotal") + methodsTotal+=1 + } + if (useDhash) + { + weightedToleranceTotal += dHashTolerance * dHashWeight + debug(s"Dhash Tolerance: $dHashTolerance Current Weighted Tolerance: $weightedToleranceTotal") + methodsTotal+=1 + } + if (usePhash) + { + weightedToleranceTotal += pHashTolerance * pHashWeight + debug(s"Phash Tolerance: $pHashTolerance Current Weighted Tolerance: $weightedToleranceTotal") + methodsTotal+=1 + } + val weightedTolerance = weightedToleranceTotal / methodsTotal + debug(s"Calculated Weighted Tolerance: $weightedTolerance") + weightedTolerance + } + + def areImageHashesSimilar(imageHash1:ImageHashDTO, imageHash2:ImageHashDTO):Boolean = { + val weightedHammingMean = getWeightedHashSimilarity(imageHash1, imageHash2) + val weightedToleranceMean = getWeightedHashTolerence if (weightedHammingMean <= weightedToleranceMean) true else false } diff --git a/src/main/scala/com/sothr/imagetools/image/Image.scala b/src/main/scala/com/sothr/imagetools/image/Image.scala index ccec328..91b186f 100644 --- a/src/main/scala/com/sothr/imagetools/image/Image.scala +++ b/src/main/scala/com/sothr/imagetools/image/Image.scala @@ -2,26 +2,34 @@ package com.sothr.imagetools.image import scala.collection.Traversable import com.sothr.imagetools.dto.ImageHashDTO +import com.sothr.imagetools.hash.HashService -abstract class Image(val imagePath:String, val thumbnailPath:String, protected var hashes:ImageHashDTO = null) { +class Image(val imagePath:String, val thumbnailPath:String, var hashes:ImageHashDTO = null) { - protected val imageType:ImageType = ImageType.SingleFrameImage + var imageType:ImageType = ImageType.SingleFrameImage - def getHashes():ImageHashDTO = this.hashes - def setHashes(newHashes:ImageHashDTO) = { this.hashes = newHashes } + def isSimilarTo(otherImage:Image):Boolean = { + HashService.areImageHashesSimilar(this.hashes,otherImage.hashes) + } + + def getSimilarity(otherImage:Image):Float = { + HashService.getWeightedHashSimilarity(this.hashes, otherImage.hashes) + } - def isSimilarTo(otherImage:Image):Boolean + /*def getSimilar(otherImages:Traversable[Image]):Traversable[Image] = { - def getSimilarity(otherImage:Image) + }*/ - def getSimilar(otherImages:Traversable[Image]):Traversable[Image] + def getPath:String = { + this.imagePath + } - def getPath():String = { - return this.imagePath + def getThumbnailPath:String = { + this.thumbnailPath } - def getThumbnailPath():String = { - return this.thumbnailPath + override def toString:String = { + s"Image: $imagePath Thumbnail: $thumbnailPath Hashes: $hashes" } } diff --git a/src/main/scala/com/sothr/imagetools/image/ImageFilter.scala b/src/main/scala/com/sothr/imagetools/image/ImageFilter.scala new file mode 100644 index 0000000..bfab988 --- /dev/null +++ b/src/main/scala/com/sothr/imagetools/image/ImageFilter.scala @@ -0,0 +1,18 @@ +package com.sothr.imagetools.image + +import java.io.{File, FilenameFilter} +import scala.collection.immutable.HashSet + +/** + * Created by drew on 1/26/14. + */ +class ImageFilter extends FilenameFilter { + + private val extensions:HashSet[String] = new HashSet[String]() ++ Array("png", "bmp", "gif", "jpg", "jpeg") + + def accept(dir: File, name: String): Boolean = { + val splitName = name.split('.') + val extension = if (splitName.length > 1) splitName(splitName.length-1) else "" + if (extensions.contains(extension)) true else false + } +} diff --git a/src/main/scala/com/sothr/imagetools/image/ImageService.scala b/src/main/scala/com/sothr/imagetools/image/ImageService.scala index c6b45fe..3796fb1 100644 --- a/src/main/scala/com/sothr/imagetools/image/ImageService.scala +++ b/src/main/scala/com/sothr/imagetools/image/ImageService.scala @@ -3,9 +3,24 @@ package com.sothr.imagetools import grizzled.slf4j.Logging import java.awt.image.{DataBufferByte, BufferedImage, ColorConvertOp} import net.coobird.thumbnailator.Thumbnails +import java.io.File +import com.sothr.imagetools.image.Image +import com.sothr.imagetools.hash.HashService object ImageService extends Logging { - + + def getImage(file:File):Image = { + val thumbnailPath = getThumbnailPath(file) + val hashes = HashService.getImageHashes(file.getAbsolutePath) + val image = new Image(file.getAbsolutePath, thumbnailPath, hashes) + debug(s"Created image: $image") + image + } + + def getThumbnailPath(file:File):String = { + "." + } + /** * Get the raw data for an image */ diff --git a/src/main/scala/com/sothr/imagetools/util/Version.scala b/src/main/scala/com/sothr/imagetools/util/Version.scala index 7b07742..e750077 100644 --- a/src/main/scala/com/sothr/imagetools/util/Version.scala +++ b/src/main/scala/com/sothr/imagetools/util/Version.scala @@ -1,15 +1,25 @@ package com.sothr.imagetools.util +import grizzled.slf4j.Logging +import java.lang.NumberFormatException + /** * Created by drew on 1/6/14. */ -class Version(val versionString:String) { +class Version(val versionString:String) extends Logging{ //parse version into parts //typical version string i.e. 0.1.0-DEV-27-060aec7 val (major,minor,patch,buildTag,buildNumber,buildHash) = { - val splitVersion = versionString.split("""\.""") - val splitType = splitVersion(splitVersion.length-1).split("""-""") - (splitVersion(0).toInt,splitVersion(1).toInt,splitType(0).toInt,splitType(1),splitType(2),splitType(3)) + var version:Tuple6[Int,Int,Int,String,Int,String] = (0,0,0,"DEV",0,"asdfzxcv") + try { + val splitVersion = versionString.split("""\.""") + val splitType = splitVersion(splitVersion.length-1).split("""-""") + version = (splitVersion(0).toInt,splitVersion(1).toInt,splitType(0).toInt,splitType(1),splitType(2).toInt,splitType(3)) + } catch { + case nfe:NumberFormatException => error(s"Error parsing number from version string '$versionString'", nfe) + case e:Exception => error(s"Unexpected error parsing version string '$versionString'", e) + } + version } /* diff --git a/src/test/scala/com/sothr/imagetools/EngineTest.scala b/src/test/scala/com/sothr/imagetools/EngineTest.scala new file mode 100644 index 0000000..4682b15 --- /dev/null +++ b/src/test/scala/com/sothr/imagetools/EngineTest.scala @@ -0,0 +1,11 @@ +package com.sothr.imagetools + +/** + * Created by drew on 1/26/14. + */ +class EngineTest extends BaseTest{ + test("Test getImagesForDirectory for sample directory") { + val engine:Engine = new Engine() + assertResult(3) { engine.getImagesForDirectory("sample").length } + } +} diff --git a/src/test/scala/com/sothr/imagetools/hash/HashServiceTest.scala b/src/test/scala/com/sothr/imagetools/hash/HashServiceTest.scala index 22fc698..d426b3d 100644 --- a/src/test/scala/com/sothr/imagetools/hash/HashServiceTest.scala +++ b/src/test/scala/com/sothr/imagetools/hash/HashServiceTest.scala @@ -256,6 +256,60 @@ class HashServiceTest extends BaseTest { assert(HashService.arePhashSimilar(mediumHash,smallHash)) } + def md5TestCase(filePath:String):String = { + HashService.getMD5(filePath) + } + + test("Benchmark MD5") { + info("Benchmarking MD5") + info("PHash Large Image 3684x2736") + val largeTime1 = getTime { md5TestCase(TestParams.LargeSampleImage1) } + val largeTime2 = getTime { md5TestCase(TestParams.LargeSampleImage1) } + val largeTime3 = getTime { md5TestCase(TestParams.LargeSampleImage1) } + val largeTime4 = getTime { md5TestCase(TestParams.LargeSampleImage1) } + val largeTime5 = getTime { md5TestCase(TestParams.LargeSampleImage1) } + val largeMean = getMean(largeTime1, largeTime2, largeTime3, largeTime4, largeTime5) + info(s"The mean time of 5 tests for large was: $largeMean ms") + info("PHash Medium Image 1824x1368") + val mediumTime1 = getTime { md5TestCase(TestParams.MediumSampleImage1) } + val mediumTime2 = getTime { md5TestCase(TestParams.MediumSampleImage1) } + val mediumTime3 = getTime { md5TestCase(TestParams.MediumSampleImage1) } + val mediumTime4 = getTime { md5TestCase(TestParams.MediumSampleImage1) } + val mediumTime5 = getTime { md5TestCase(TestParams.MediumSampleImage1) } + val mediumMean = getMean(mediumTime1, mediumTime2, mediumTime3, mediumTime4, mediumTime5) + info(s"The mean time of 5 tests for medium was: $mediumMean ms") + info("PHash Small Image 912x684") + val smallTime1 = getTime { md5TestCase(TestParams.SmallSampleImage1) } + val smallTime2 = getTime { md5TestCase(TestParams.SmallSampleImage1) } + val smallTime3 = getTime { md5TestCase(TestParams.SmallSampleImage1) } + val smallTime4 = getTime { md5TestCase(TestParams.SmallSampleImage1) } + val smallTime5 = getTime { md5TestCase(TestParams.SmallSampleImage1) } + val smallMean = getMean(smallTime1, smallTime2, smallTime3, smallTime4, smallTime5) + info(s"The mean time of 5 tests for small was: $smallMean ms") + assert(true) + } + + test("Calculate MD5 Large Sample Image 1") { + debug("Starting 'Calculate MD5 Large Sample Image 1' test") + val hash = HashService.getMD5(TestParams.LargeSampleImage1) + debug(s"Testing that $hash = 3fbccfd5faf3f991435b827ee5961862") + assert(hash == "3fbccfd5faf3f991435b827ee5961862") + } + + test("Calculate MD5 Medium Sample Image 1") { + debug("Starting 'Calculate MD5 Medium Sample Image 1' test") + val hash = HashService.getMD5(TestParams.MediumSampleImage1) + debug(s"Testing that $hash = a95e2cc4610307eb957e9c812429c53e") + assert(hash == "a95e2cc4610307eb957e9c812429c53e") + } + + test("Calculate MD5 Small Sample Image 1") { + debug("Starting 'Calculate MD5 Small Sample Image 1' test") + val hash = HashService.getMD5(TestParams.SmallSampleImage1) + debug(s"Testing that $hash = b137131bd55896c747286e4d247b845e") + assert(hash == "b137131bd55896c747286e4d247b845e") + } + def imageHashTestCase(filePath:String):ImageHashDTO = { HashService.getImageHashes(filePath) } diff --git a/src/test/scala/com/sothr/imagetools/image/ImageFilterTest.scala b/src/test/scala/com/sothr/imagetools/image/ImageFilterTest.scala new file mode 100644 index 0000000..f8857a1 --- /dev/null +++ b/src/test/scala/com/sothr/imagetools/image/ImageFilterTest.scala @@ -0,0 +1,31 @@ +package com.sothr.imagetools.image + +import com.sothr.imagetools.BaseTest +import java.io.File + +/** + * Created by drew on 1/26/14. + */ +class ImageFilterTest extends BaseTest{ + + test("Confirm ImageFilter Works") { + val filter:ImageFilter = new ImageFilter() + val bogusDirectory = new File(".") + assert(filter.accept(bogusDirectory, "test.png")) + assert(filter.accept(bogusDirectory, "test.bmp")) + assert(filter.accept(bogusDirectory, "test.gif")) + assert(filter.accept(bogusDirectory, "test.jpg")) + assert(filter.accept(bogusDirectory, "test.jpeg")) + assert(filter.accept(bogusDirectory, "test.jpeg.jpg")) + } + + test("Confirm ImageFiler Fails") { + val filter:ImageFilter = new ImageFilter() + val bogusDirectory = new File(".") + assertResult(false) { filter.accept(bogusDirectory,"test") } + assertResult(false) { filter.accept(bogusDirectory,"test.mp4") } + assertResult(false) { filter.accept(bogusDirectory,"test.gif.mp4") } + assertResult(false) { filter.accept(bogusDirectory,"") } + } + +}