Eu escrevi o seguinte script com R:
#!/usr/bin/Rscript
makeSureInstalled<-function(package) {if (length(grep(paste("^",package,"$",sep=""),noquote(installed.packages())[,1]))==0) install.packages(package)
library(package=package,character.only=TRUE)
}
makeSureInstalled("argparse")
makeSureInstalled("foreach")
makeSureInstalled("doMC")
arghandling<-function()
# Reads command line arguments, verifies the input
{
parser <- ArgumentParser(description="Reads all files in the specified directory (not-recursively) treats them as jpegs, and groups them by mean color.",
epilog="Example: ./wallpapers-grouping.R --cluster-count 9 --out-prefix 'cluster '")
parser$add_argument("--cluster-count","-n", type="integer", dest="clustercount",
help="Number of groups to do. Must be (much) smaller than number of individual photos.")
parser$add_argument("--feature-set", "-f", dest="mode", type="integer",
help="Before running the actual clustering, the program pre-processes image and extracts features. There are 2 feature sets supported: 1 - one pixel (3 numbers for each colour). 2 - 4 pixel (image is divided into equal 4 parts, and each part is averaged to one pixel. 4x3=12 numbers in total.)")
parser$add_argument("--out-prefix","-o", type="character", default='cluster ', dest='prefix',
help="The prefix appended to each grouped picture before grouping. Can be a directory name (absolute or relative) or name prefix.")
parser$add_argument("--cpu-count", type="integer", dest="cpu",
help="How many cores to use. Defaults to all available cores.")
parser$add_argument("input", type="character",
help="Input directory. Defaults to current directory. ")
args<-commandArgs(trailingOnly = TRUE)
if (length(args)==0)
{
parser$print_help()
quit()
} else
{
cat("Welcome to the wallpapers-grouping.R program.\n")
args=parser$parse_args(args)
}
if (is.null(args$clustercount))
{
cat(paste0("Error! You must specify number of clusters with --cluster-count or -n argument.\n"))
quit()
}
if (is.null(args$input))
{
args$input<-cwd()
} else {
if(!file.exists(args$input))
{
cat(paste0("Cannot find the input directory (", args$input,")\n"))
quit()
}
}
photos=dir(path=args$input)
cat(paste0(length(photos)," files found in ",args$input,".\n"))
if (length(photos)<=args$clustercount)
{
cat(paste0("Number of clusters must be (much) smaller than number of photos.\n"))
quit()
}
registerDoMC()
howManyCores<-getDoParWorkers()
if(is.null(args$cpu) || args$cpu==howManyCores)
{
cat(paste0(howManyCores," cores detected.\n"))
} else {
cat(paste0(howManyCores," cores detected, but will use ",args$cpu," independent threads.\n"))
howManyCores<-args$cpu
}
registerDoMC(cores=howManyCores)
if(is.null(args$mode))
{
args$mode<-1
}
args$mode<-switch(args$mode,1,2)
list(args=args,photos=photos)
}
ah<-arghandling()
photos<-ah$photos
args<-ah$args
makeSureInstalled('jpeg')
makeSureInstalled('plyr')
processphoto<-function(filename,mode=1) {
photo=readJPEG(filename)
if(mode==1)
{
apply(photo,3,mean)
}else {
dims<-dim(obraz)[1:2]
gr=round(dims/2)
o11<-photo[1:gr[[1]],1:gr[[2]],]
o12<-photo[(gr[[1]]+1):dims[[1]],1:gr[[2]],]
o21<-photo[1:gr[[1]],(gr[[2]]+1):dims[[2]],]
o22<-photo[(gr[[1]]+1):dims[[1]],(gr[[2]]+1):dims[[2]],]
o11<-apply(o11,3,mean)
o12<-apply(o12,3,mean)
o21<-apply(o21,3,mean)
o22<-apply(o22,3,mean)
c(o11,o12,o21,o22)
}
}
cat(paste0('Extracting features from all ',length(photos),' pictures...'))
results<-aaply(photos,1,processphoto,.parallel=TRUE,mode=args$mode)
cat(paste0('Grouping them into ',length(args$clustercount),' groups using k-means...'))
clusters<-kmeans(results,args$clustercount)
copycluster <- function(which.cluster) {
files<-photos[clusters$cluster==which.cluster]
todir <- paste0(args$prefix,which.cluster)
if (!isTRUE(file.info(todir)$isdir)) dir.create(todir, recursive=TRUE)
file.copy(from = files, to = paste0(todir,'/',files ))
}
cat(paste0("Copying photos into their groups..."))
a_ply(1:args$clustercount,1,copycluster)
Os scripts instalam as bibliotecas necessárias. No Linux / Mac, ele usará todos os núcleos disponíveis (no Windows, infelizmente não pode).
Deve funcionar imediatamente em qualquer sistema, e no Linux / Mac, obrigado pelo shebang, ele pode ser executado diretamente do terminal.