diff --git a/rescience_paper/.gitignore b/rescience_paper/.gitignore new file mode 100644 index 00000000..ddfeabfe --- /dev/null +++ b/rescience_paper/.gitignore @@ -0,0 +1,12 @@ +# ignore everything, but the stuff below + +/* + +!/graphics +!/sections +!/bibliography.bib +!/content.tex +!/download.py +!/metadata.yaml +!/preamble.tex +!/README.md diff --git a/rescience_paper/README.md b/rescience_paper/README.md new file mode 100644 index 00000000..2e0b5b7a --- /dev/null +++ b/rescience_paper/README.md @@ -0,0 +1,17 @@ +# generate PDF + +```bash +$ python download.py +$ make +$ biber article.tex +$ touch article.tex +$ make +$ touch article.tex +$ make +``` + +# clean + +```bash +git clean -dffx +``` \ No newline at end of file diff --git a/rescience_paper/bibliography.bib b/rescience_paper/bibliography.bib new file mode 100644 index 00000000..b8f20573 --- /dev/null +++ b/rescience_paper/bibliography.bib @@ -0,0 +1,796 @@ +% Encoding: UTF-8 +@STRING{IEEE_J_AC = "{IEEE} Trans. Autom. Control"} + + + +components, packaging and manufacturing +@STRING{IEEE_J_ADVP = "{IEEE} Trans. Adv. Packag."} + +IEEEabrv.bib +V1.12 (2007/01/11) +Copyright (c) 2002-2007 by Michael Shell +See: http://www.michaelshell.org/ +for current contact information. + +BibTeX bibliography string definitions of the ABBREVIATED titles of +IEEE journals and magazines and online publications. + +This file is designed for bibliography styles that require +abbreviated titles and is not for use in bibliographies that +require full-length titles. + +Support sites: +http://www.michaelshell.org/tex/ieeetran/ +http://www.ctan.org/tex-archive/macros/latex/contrib/IEEEtran/ +and/or +http://www.ieee.org/ + +Special thanks to Laura Hyslop and ken Rawson of IEEE for their help +in obtaining the information needed to compile this file. Also, +Volker Kuhlmann and Moritz Borgmann kindly provided some corrections +and additions. + +************************************************************************* +Legal Notice: +This code is offered as-is without any warranty either expressed or +implied; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE! +User assumes all risk. +In no event shall IEEE or any contributor to this code be liable for +any damages or losses, including, but not limited to, incidental, +consequential, or any other damages, resulting from the use or misuse +of any information contained here. + +All comments are the opinions of their respective authors and are not +necessarily endorsed by the IEEE. + +This work is distributed under the LaTeX Project Public License (LPPL) +( http://www.latex-project.org/ ) version 1.3, and may be freely used, +distributed and modified. A copy of the LPPL, version 1.3, is included +in the base LaTeX documentation of all distributions of LaTeX released +2003/12/01 or later. +Retain all contribution notices and credits. +** Modified files should be clearly indicated as such, including ** +** renaming them and changing author support contact information. ** + +File list of work: IEEEabrv.bib, IEEEfull.bib, IEEEexample.bib, + IEEEtran.bst, IEEEtranS.bst, IEEEtranSA.bst, + IEEEtranN.bst, IEEEtranSN.bst, IEEEtran_bst_HOWTO.pdf +************************************************************************* + + +USAGE: + +\bibliographystyle{mybstfile} +\bibliography{IEEEabrv,mybibfile} + +where the IEEE titles in the .bib database entries use the strings +defined here. e.g., + + + journal = IEEE_J_AC, + + +to yield "{IEEE} Trans. Automat. Contr." + + +IEEE uses abbreviated journal titles in their bibliographies - +this file is suitable for work that is to be submitted to the IEEE. + + +For work that requires full-length titles, you should use the full +titles provided in the companion file, IEEEfull.bib. + + +** NOTES ** + + 1. Journals have been grouped according to subject in order to make it + easier to locate and extract the definitions for related journals - + as most works use references that are confined to a single topic. + Magazines are listed in straight alphabetical order. + + 2. String names are closely based on IEEE's own internal acronyms. + + 3. Abbreviations follow IEEE's style. + + 4. Older, out-of-print IEEE titles are included (but not including titles + dating prior to IEEE's formation from the IRE and AIEE in 1963). + + 5. The following NEW/current journal definitions have been disabled because + their abbreviations have not yet been verified: + + STRING{IEEE_J_CBB = "{IEEE/ACM} Trans. Comput. Biology Bioinformatics"} + STRING{IEEE_J_CJECE = "Canadian J. Elect. Comput. Eng."} + STRING{IEEE_J_DSC = "{IEEE} Trans. Dependable Secure Comput."} + STRING{IEEE_O_DSO = "{IEEE} Distrib. Syst. Online"} + + 6. The following OLD journal definitions have been disabled because + their abbreviations have not yet been found/verified: + + STRING{IEEE_J_BCTV = "{IEEE} Trans. Broadcast Television Receivers"} + STRING{IEEE_J_EWS = "{IEEE} Trans. Eng. Writing Speech"} + +If you know what the proper abbreviation is for a string in #5 or #6 above, +email me and I will correct them in the next release. + + + + + +IEEE Journals + + + +aerospace and military +@STRING{IEEE_J_AES = "{IEEE} Trans. Aerosp. Electron. Syst."} +@STRING{IEEE_J_AIRE = "{IEEE} Trans. Airborne Electron."} +@STRING{IEEE_J_ANE = "{IEEE} Trans. Aerosp. Navig. Electron."} +@STRING{IEEE_J_ANNE = "{IEEE} Trans. Aeronaut. Navig. Electron."} +@STRING{IEEE_J_AP = "{IEEE} Trans. Antennas Propag."} + + + +industrial, commercial and consumer +@STRING{IEEE_J_APPIND = "{IEEE} Trans. Appl. Ind."} +@STRING{IEEE_J_AS = "{IEEE} Trans. Aerosp."} +@STRING{IEEE_J_ASC = "{IEEE} Trans. Appl. Supercond."} + + + +cybernetics, ergonomics, robots, man-machine, and automation +@STRING{IEEE_J_ASE = "{IEEE} Trans. Autom. Sci. Eng."} +@STRING{IEEE_J_ASSP = "{IEEE} Trans. Acoust., Speech, Signal Process."} +@STRING{IEEE_J_AU = "{IEEE} Trans. Audio"} +@STRING{IEEE_J_AUEA = "{IEEE} Trans. Audio Electroacoust."} + + + +electromagnetics, antennas, EMI, magnetics and microwave +@STRING{IEEE_J_AWPL = "{IEEE} Antennas Wireless Propag. Lett."} +Note: The B-ME journal later dropped the hyphen and became the BME. +@STRING{IEEE_J_B-ME = "{IEEE} Trans. Bio-Med. Eng."} +@STRING{IEEE_J_BC = "{IEEE} Trans. Broadcast."} + + + +medical and biological +@STRING{IEEE_J_BME = "{IEEE} Trans. Biomed. Eng."} +@STRING{IEEE_J_BMELC = "{IEEE} Trans. Bio-Med. Electron."} + + + +computers, computation, networking and software +@STRING{IEEE_J_C = "{IEEE} Trans. Comput."} +@STRING{IEEE_J_CAD = "{IEEE} Trans. Comput.-Aided Design Integr. Circuits Syst."} +@STRING{IEEE_J_CAL = "{IEEE} Comput. Archit. Lett."} +@STRING{IEEE_J_CAPT = "{IEEE} Trans. Compon. Packag. Technol."} +@STRING{IEEE_J_CAPTS = "{IEEE} Trans. Compon. Packag. Technol."} +@STRING{IEEE_J_CAS = "{IEEE} Trans. Circuits Syst."} +@STRING{IEEE_J_CASI = "{IEEE} Trans. Circuits Syst. {I}"} +in 2004 CASI and CASII renamed part title to CASI_RP and CASII_EB, respectively. +@STRING{IEEE_J_CASI_RP = "{IEEE} Trans. Circuits Syst. {I}"} +@STRING{IEEE_J_CASII = "{IEEE} Trans. Circuits Syst. {II}"} +@STRING{IEEE_J_CASII_EB = "{IEEE} Trans. Circuits Syst. {II}"} +@STRING{IEEE_J_CASVT = "{IEEE} Trans. Circuits Syst. Video Technol."} +disabled till definition is verified +STRING{IEEE_J_BCTV = "{IEEE} Trans. Broadcast Television Receivers"} +@STRING{IEEE_J_CE = "{IEEE} Trans. Consum. Electron."} +@STRING{IEEE_J_CHMT = "{IEEE} Trans. Compon., Hybrids, Manuf. Technol."} +@STRING{IEEE_J_COM = "{IEEE} Trans. Commun."} + + + +communications +@STRING{IEEE_J_COML = "{IEEE} Commun. Lett."} +@STRING{IEEE_J_COMT = "{IEEE} Trans. Commun. Technol."} +@STRING{IEEE_J_CPART = "{IEEE} Trans. Compon. Parts"} +@STRING{IEEE_J_CPMTA = "{IEEE} Trans. Compon., Packag., Manuf. Technol. {A}"} +@STRING{IEEE_J_CPMTB = "{IEEE} Trans. Compon., Packag., Manuf. Technol. {B}"} +@STRING{IEEE_J_CPMTC = "{IEEE} Trans. Compon., Packag., Manuf. Technol. {C}"} +@STRING{IEEE_J_CST = "{IEEE} Trans. Control Syst. Technol."} +@STRING{IEEE_J_CT = "{IEEE} Trans. Circuit Theory"} +@STRING{IEEE_J_DEI = "{IEEE} Trans. Dielectr. Electr. Insul."} + + + +reliability +IEEE seems to want "Mat." here, not "Mater." +@STRING{IEEE_J_DMR = "{IEEE} Trans. Device Mater. Rel."} + + + +energy and power +@STRING{IEEE_J_EC = "{IEEE} Trans. Energy Convers."} +disabled till definition is verified +STRING{IEEE_J_DSC = "{IEEE} Trans. Dependable Secure Comput."} +@STRING{IEEE_J_ECOMP = "{IEEE} Trans. Electron. Comput."} +@STRING{IEEE_J_ED = "{IEEE} Trans. Electron Devices"} + + + +physics, electrons, nanotechnology, nuclear and quantum electronics +@STRING{IEEE_J_EDL = "{IEEE} Electron Device Lett."} +@STRING{IEEE_J_EDU = "{IEEE} Trans. Educ."} +@STRING{IEEE_J_EI = "{IEEE} Trans. Electr. Insul."} +@STRING{IEEE_J_EM = "{IEEE} Trans. Eng. Manag."} +@STRING{IEEE_J_EMC = "{IEEE} Trans. Electromagn. Compat."} +@STRING{IEEE_J_EPM = "{IEEE} Trans. Electron. Packag. Manuf."} + + + +semiconductors, superconductors, electrochemical and solid state +@STRING{IEEE_J_ESSL = "{IEEE/ECS} Electrochem. Solid-State Lett."} +@STRING{IEEE_J_EVC = "{IEEE} Trans. Evol. Comput."} +@STRING{IEEE_J_FUZZ = "{IEEE} Trans. Fuzzy Syst."} + + + +earth, wind, fire and water +@STRING{IEEE_J_GE = "{IEEE} Trans. Geosci. Electron."} +@STRING{IEEE_J_GRS = "{IEEE} Trans. Geosci. Remote Sens."} +@STRING{IEEE_J_GRSL = "{IEEE} Geosci. Remote Sens. Lett."} +@STRING{IEEE_J_HFE = "{IEEE} Trans. Hum. Factors Electron."} +@STRING{IEEE_J_IA = "{IEEE} Trans. Ind. Appl."} +@STRING{IEEE_J_IE = "{IEEE} Trans. Ind. Electron."} +@STRING{IEEE_J_IECI = "{IEEE} Trans. Ind. Electron. Contr. Instrum."} +@STRING{IEEE_J_IFS = "{IEEE} Trans. Inf. Forensics Security"} +@STRING{IEEE_J_IGA = "{IEEE} Trans. Ind. Gen. Appl."} +@STRING{IEEE_J_IINF = "{IEEE} Trans. Ind. Informat."} + + + +instrumentation and measurement +@STRING{IEEE_J_IM = "{IEEE} Trans. Instrum. Meas."} +@STRING{IEEE_J_IP = "{IEEE} Trans. Image Process."} + + + +coding, data, information, knowledge +@STRING{IEEE_J_IT = "{IEEE} Trans. Inf. Theory"} +disabled till definition is verified +STRING{IEEE_J_CBB = "{IEEE/ACM} Trans. Comput. Biology Bioinformatics"} +@STRING{IEEE_J_ITBM = "{IEEE} Trans. Inf. Technol. Biomed."} + + + +autos, transportation and vehicles (non-aerospace) +@STRING{IEEE_J_ITS = "{IEEE} Trans. Intell. Transp. Syst."} + + + +computer graphics, imaging, and multimedia +@STRING{IEEE_J_JDT = "{IEEE/OSA} J. Display Technol."} + + + +insulation and materials +@STRING{IEEE_J_JEM = "{IEEE/TMS} J. Electron. Mater."} +@STRING{IEEE_J_JLT = "J. Lightw. Technol."} +@STRING{IEEE_J_JQE = "{IEEE} J. Quantum Electron."} +@STRING{IEEE_J_JRA = "{IEEE} J. Robot. Autom."} +@STRING{IEEE_J_JSAC = "{IEEE} J. Sel. Areas Commun."} +@STRING{IEEE_J_JSSC = "{IEEE} J. Solid-State Circuits"} +@STRING{IEEE_J_JSTQE = "{IEEE} J. Sel. Topics Quantum Electron."} +@STRING{IEEE_J_KDE = "{IEEE} Trans. Knowl. Data Eng."} +@STRING{IEEE_J_MAG = "{IEEE} Trans. Magn."} +@STRING{IEEE_J_MC = "{IEEE} Trans. Mobile Comput."} +@STRING{IEEE_J_ME = "{IEEE} Trans. Med. Electron."} + + + +mechanical +@STRING{IEEE_J_MECH = "{IEEE/ASME} Trans. Mechatronics"} +@STRING{IEEE_J_MEMS = "J. Microelectromech. Syst."} +@STRING{IEEE_J_MFT = "{IEEE} Trans. Manuf. Technol."} +@STRING{IEEE_J_MGWL = "{IEEE} Microw. Guided Wave Lett."} +@STRING{IEEE_J_MI = "{IEEE} Trans. Med. Imag."} +@STRING{IEEE_J_MIL = "{IEEE} Trans. Mil. Electron."} +@STRING{IEEE_J_MM = "{IEEE} Trans. Multimedia"} +@STRING{IEEE_J_MMS = "{IEEE} Trans. Man-Mach. Syst."} +@STRING{IEEE_J_MTT = "{IEEE} Trans. Microw. Theory Tech."} +IEEE seems to want "Compon." here, not "Comp." +@STRING{IEEE_J_MWCL = "{IEEE} Microw. Wireless Compon. Lett."} +@STRING{IEEE_J_NANO = "{IEEE} Trans. Nanotechnol."} +@STRING{IEEE_J_NB = "{IEEE} Trans. Nanobiosci."} +@STRING{IEEE_J_NET = "{IEEE/ACM} Trans. Netw."} +@STRING{IEEE_J_NN = "{IEEE} Trans. Neural Netw."} +@STRING{IEEE_J_NS = "{IEEE} Trans. Nucl. Sci."} +@STRING{IEEE_J_NSRE = "{IEEE} Trans. Neural Syst. Rehabil. Eng."} +@STRING{IEEE_J_OE = "{IEEE} J. Ocean. Eng."} +@STRING{IEEE_J_PAMI = "{IEEE} Trans. Pattern Anal. Mach. Intell."} +disabled till definition is verified +STRING{IEEE_J_EWS = "{IEEE} Trans. Eng. Writing Speech"} +@STRING{IEEE_J_PC = "{IEEE} Trans. Prof. Commun."} +@STRING{IEEE_J_PDS = "{IEEE} Trans. Parallel Distrib. Syst."} +@STRING{IEEE_J_PEL = "{IEEE} Power Electron. Lett."} +@STRING{IEEE_J_PHP = "{IEEE} Trans. Parts, Hybrids, Packag."} +@STRING{IEEE_J_PMP = "{IEEE} Trans. Parts, Mater., Packag."} + + + +education, engineering, history, IEEE, professional +disabled till definition is verified +STRING{IEEE_J_CJECE = "Canadian J. Elect. Comput. Eng."} +@STRING{IEEE_J_PROC = "Proc. {IEEE}"} +@STRING{IEEE_J_PS = "{IEEE} Trans. Plasma Sci."} +@STRING{IEEE_J_PSE = "{IEEE} J. Product Safety Eng."} + + + + optics, lightwave and photonics +@STRING{IEEE_J_PTL = "{IEEE} Photon. Technol. Lett."} +@STRING{IEEE_J_PWRAS = "{IEEE} Trans. Power App. Syst."} +@STRING{IEEE_J_PWRD = "{IEEE} Trans. Power Del."} +@STRING{IEEE_J_PWRE = "{IEEE} Trans. Power Electron."} +@STRING{IEEE_J_PWRS = "{IEEE} Trans. Power Syst."} +@STRING{IEEE_J_R = "{IEEE} Trans. Rel."} +in 1989 JRA became RA +in August 2004, RA split into ASE and RO +@STRING{IEEE_J_RA = "{IEEE} Trans. Robot. Autom."} +@STRING{IEEE_J_RE = "{IEEE} Trans. Rehabil. Eng."} +@STRING{IEEE_J_RFI = "{IEEE} Trans. Radio Freq. Interference"} +@STRING{IEEE_J_RO = "{IEEE} Trans. Robot."} +@STRING{IEEE_J_SAP = "{IEEE} Trans. Speech Audio Process."} +@STRING{IEEE_J_SE = "{IEEE} Trans. Softw. Eng."} + + + +sensors +@STRING{IEEE_J_SENSOR = "{IEEE} Sensors J."} +@STRING{IEEE_J_SM = "{IEEE} Trans. Semicond. Manuf."} +@STRING{IEEE_J_SMC = "{IEEE} Trans. Syst., Man, Cybern."} +@STRING{IEEE_J_SMCA = "{IEEE} Trans. Syst., Man, Cybern. {A}"} +@STRING{IEEE_J_SMCB = "{IEEE} Trans. Syst., Man, Cybern. {B}"} +@STRING{IEEE_J_SMCC = "{IEEE} Trans. Syst., Man, Cybern. {C}"} +@STRING{IEEE_J_SP = "{IEEE} Trans. Signal Process."} + + + +circuits, signals, systems, audio and controls +@STRING{IEEE_J_SPL = "{IEEE} Signal Process. Lett."} +@STRING{IEEE_J_SSC = "{IEEE} Trans. Syst. Sci. Cybern."} +@STRING{IEEE_J_SU = "{IEEE} Trans. Sonics Ultrason."} + + + +CAD +@STRING{IEEE_J_TCAD = "{IEEE} J. Technol. Comput. Aided Design"} +@STRING{IEEE_J_TJMJ = "{IEEE} Transl. J. Magn. Jpn."} +@STRING{IEEE_J_UE = "{IEEE} Trans. Ultrason. Eng."} +@STRING{IEEE_J_UFFC = "{IEEE} Trans. Ultrason., Ferroelectr., Freq. Control"} +@STRING{IEEE_J_VC = "{IEEE} Trans. Veh. Commun."} +@STRING{IEEE_J_VCG = "{IEEE} Trans. Vis. Comput. Graphics"} + + + +VLSI +@STRING{IEEE_J_VLSI = "{IEEE} Trans. {VLSI} Syst."} +@STRING{IEEE_J_VT = "{IEEE} Trans. Veh. Technol."} +@STRING{IEEE_J_WCOM = "{IEEE} Trans. Wireless Commun."} + + + + + + +IEEE Magazines + + + +@STRING{IEEE_M_AES = "{IEEE} Aerosp. Electron. Syst. Mag."} +@STRING{IEEE_M_AP = "{IEEE} Antennas Propag. Mag."} +@STRING{IEEE_M_ASSP = "{IEEE} {ASSP} Mag."} +@STRING{IEEE_M_C = "{IEEE} Computer"} +@STRING{IEEE_M_CAP = "{IEEE} Comput. Appl. Power"} +@STRING{IEEE_M_CAS = "{IEEE} Circuits Syst. Mag."} +@STRING{IEEE_M_CD = "{IEEE} Circuits Devices Mag."} +@STRING{IEEE_M_CGA = "{IEEE} Comput. Graph. Appl."} +@STRING{IEEE_M_CIM = "{IEEE} Comput. Intell. Mag."} +@STRING{IEEE_M_COM = "{IEEE} Commun. Mag."} +@STRING{IEEE_M_COMSOC = "{IEEE} Commun. Soc. Mag."} +@STRING{IEEE_M_CONC = "{IEEE} Concurrency"} +@STRING{IEEE_M_CS = "{IEEE} Control Syst. Mag."} +CSEM changed to CSE in 1999 +@STRING{IEEE_M_CSE = "{IEEE} Comput. Sci. Eng."} +@STRING{IEEE_M_CSEM = "{IEEE} Comput. Sci. Eng. Mag."} +@STRING{IEEE_M_DTC = "{IEEE} Des. Test. Comput."} +@STRING{IEEE_M_EI = "{IEEE} Electr. Insul. Mag."} +@STRING{IEEE_M_EMB = "{IEEE} Eng. Med. Biol. Mag."} +@STRING{IEEE_M_EMR = "{IEEE} Eng. Manag. Rev."} +@STRING{IEEE_M_ETR = "{IEEE} ElectroTechnol. Rev."} +@STRING{IEEE_M_EXP = "{IEEE} Expert"} +@STRING{IEEE_M_HIST = "{IEEE} Ann. Hist. Comput."} +@STRING{IEEE_M_IA = "{IEEE} Ind. Appl. Mag."} +@STRING{IEEE_M_IC = "{IEEE} Internet Comput."} +@STRING{IEEE_M_IM = "{IEEE} Instrum. Meas. Mag."} +@STRING{IEEE_M_IS = "{IEEE} Intell. Syst."} +@STRING{IEEE_M_ITP = "{IEEE} {IT} Prof."} +@STRING{IEEE_M_MICRO = "{IEEE} Micro"} +@STRING{IEEE_M_MM = "{IEEE} Multimedia"} +@STRING{IEEE_M_MW = "{IEEE} Microw. Mag."} +@STRING{IEEE_M_NET = "{IEEE} Netw."} +IEEE's editorial manual lists "Pers. Commun.", +but "Personal Commun. Mag." seems to be what is used in the journals +@STRING{IEEE_M_PCOM = "{IEEE} Personal Commun. Mag."} +CAP and PER merged to form PE in 2003 +@STRING{IEEE_M_PE = "{IEEE} Power Energy Mag."} +@STRING{IEEE_M_PER = "{IEEE} Power Eng. Rev."} +@STRING{IEEE_M_POT = "{IEEE} Potentials"} +@STRING{IEEE_M_PVC = "{IEEE} Pervasive Comput."} +@STRING{IEEE_M_RA = "{IEEE} Robot. Autom. Mag."} +@STRING{IEEE_M_S = "{IEEE} Softw."} +@STRING{IEEE_M_SAP = "{IEEE} Security Privacy"} +@STRING{IEEE_M_SP = "{IEEE} Signal Process. Mag."} +@STRING{IEEE_M_SPECT = "{IEEE} Spectr."} +@STRING{IEEE_M_TODAY = "Today's Engineer"} +@STRING{IEEE_M_TS = "{IEEE} Technol. Soc. Mag."} +@STRING{IEEE_M_VT = "{IEEE} Veh. Technol. Mag."} +@STRING{IEEE_M_WC = "{IEEE} Wireless Commun. Mag."} + + + + + + +IEEE Online Publications + + + +@STRING{IEEE_O_CSTO = "{IEEE} Commun. Surveys Tuts."} + +@InProceedings{GEB+2017, + author = {Gatys, Leon A. and Ecker, Alexander S. and Bethge, Matthias and Hertzmann, Aaron and Shechtman, Eli}, + title = {Controlling Perceptual Factors in Neural Style Transfer}, + booktitle = {{IEEE} Conference on Computer Vision and Pattern Recognition ({CVPR})}, + year = {2017}, + abstract = {Neural Style Transfer has shown very exciting results enabling new forms of image manipulation. Here we extend the existing method to introduce control over spatial location, colour information and across spatial scale. We demonstrate how this enhances the method by allowing high-resolution controlled stylisation and helps to alleviate common failure cases such as applying ground textures to sky regions. Furthermore, by decomposing style into these perceptual factors we enable the combination of style information from multiple sources to generate new, perceptually appealing styles from existing ones. We also describe how these methods can be used to more efficiently produce large size, high-quality stylisation. Finally we show how the introduced control measures can be applied in recent methods for Fast Neural Style Transfer.}, + archiveprefix = {arXiv}, + doi = {10.1109/CVPR.2017.397}, + eprint = {1611.07865}, +} + +@InProceedings{GEB2015, + author = {Gatys, Leon A. and Ecker, Alexander S. and Bethge, Matthias}, + title = {Texture Synthesis Using Convolutional Neural Networks}, + booktitle = {Advances in Neural Information Processing Systems ({NIPS})}, + year = {2015}, + abstract = {Here we introduce a new model of natural textures based on the feature spaces of convolutional neural networks optimised for object recognition. Samples from the model are of high perceptual quality demonstrating the generative power of neural networks trained in a purely discriminative fashion. Within the model, textures are represented by the correlations between feature maps in several layers of the network. We show that across layers the texture representations increasingly capture the statistical properties of natural images while making object information more and more explicit. The model provides a new tool to generate stimuli for neuroscience and might offer insights into the deep representations learned by convolutional neural networks.}, + url = {http://papers.nips.cc/paper/5633-texture-synthesis-using-convolutional-neural-networks.pdf}, +} + +@InProceedings{GEB2016, + author = {Gatys, Leon A. and Ecker, Alexander. S. and Bethge, Matthias}, + title = {Image Style Transfer Using Convolutional Neural Networks}, + booktitle = {{IEEE} Conference on Computer Vision and Pattern Recognition ({CVPR})}, + year = {2016}, + abstract = {Rendering the semantic content of an image in different styles is a difficult image processing task. Arguably, a major limiting factor for previous approaches has been the lack of image representations that explicitly represent semantic information and, thus, allow to separate image content from style. Here we use image representations derived from Convolutional Neural Networks optimised for object recognition, which make high level image information explicit. We introduce A Neural Algorithm of Artistic Style that can separate and recombine the image content and style of natural images. The algorithm allows us to produce new images of high perceptual quality that combine the content of an arbitrary photograph with the appearance of numerous wellknown artworks. Our results provide new insights into the deep image representations learned by Convolutional Neural Networks and demonstrate their potential for high level image synthesis and manipulation.}, + archiveprefix = {arXiv}, + doi = {10.1109/CVPR.2016.265}, + eprint = {1508.06576}, +} + +@InProceedings{JAL2016, + author = {Johnson, Justin and Alahi, Alexandre and Li, Fei-Fei}, + title = {Perceptual Losses for Real-Time Style Transfer and Super-Resolution}, + booktitle = {European Conference on Computer Vision ({ECCV})}, + year = {2016}, + archiveprefix = {arXiv}, + doi = {10.1007/978-3-319-46475-6_43}, + eprint = {1603.08155}, +} + +@Article{JYF+2019, + author = {Jing, Yongcheng and Yang, Yezhou and Feng, Zunlei and Ye, Jingwen and Yu, Yizhou and Song, Mingli}, + title = {Neural Style Transfer: A Review}, + journal = {{IEEE} Transactions on Visualization and Computer Graphics}, + year = {2019}, + abstract = {The seminal work of Gatys et al. demonstrated the power of Convolutional Neural Networks (CNNs) in creating artistic imagery by separating and recombining image content and style. This process of using CNNs to render a content image in different styles is referred to as Neural Style Transfer (NST). Since then, NST has become a trending topic both in academic literature and industrial applications. It is receiving increasing attention and a variety of approaches are proposed to either improve or extend the original NST algorithm. In this paper, we aim to provide a comprehensive overview of the current progress towards NST. We first propose a taxonomy of current algorithms in the field of NST. Then, we present several evaluation methods and compare different NST algorithms both qualitatively and quantitatively. The review concludes with a discussion of various applications of NST and open problems for future research. A list of papers discussed in this review, corresponding codes, pre-trained models and more comparison results are publicly available at: https://osf.io/f8tu4/.}, + archiveprefix = {arXiv}, + doi = {10.1109/TVCG.2019.2921336}, + eprint = {1705.04058}, +} + +@InProceedings{LW2016, + author = {Li, Chuan and Wand, Michael}, + title = {Combining Markov Random Fields and Convolutional Neural Networks for Image Synthesis}, + booktitle = {{IEEE} Conference on Computer Vision and Pattern Recognition ({CVPR})}, + year = {2016}, + archiveprefix = {arXiv}, + doi = {10.1109/CVPR.2016.272}, + eprint = {1601.04589}, +} + +@InProceedings{PGM+2019, + author = {Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and Desmaison, Alban and Kopf, Andreas and Yang, Edward and DeVito, Zachary and Raison, Martin and Tejani, Alykhan and Chilamkurthy, Sasank and Steiner, Benoit and Fang, Lu and Bai, Junjie and Chintala, Soumith}, + title = {{PyTorch}: An Imperative Style, High-Performance Deep Learning Library}, + booktitle = {Advances in Neural Information Processing Systems ({NIPS})}, + year = {2019}, + url = {http://papers.neurips.cc/paper/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf}, +} + +@InProceedings{SKLO2018, + author = {Sanakoyeu, Artsiom and Kotovenko, Dmytro and Lang, Sabine and Ommer, Bjorn}, + title = {A Style-Aware Content Loss for Real-Time {HD} Style Transfer}, + booktitle = {Proceedings of the European Conference on Computer Vision ({ECCV})}, + year = {2018}, + archiveprefix = {arXiv}, + doi = {10.1007/978-3-030-01237-3_43}, + eprint = {1807.10201}, +} + +@InProceedings{ULVL2016, + author = {Ulyanov, Dmitry and Lebedev, Vadim and Vedaldi, Andrea and Lempitsky, Viktor S.}, + title = {Texture Networks: Feed-forward Synthesis of Textures and Stylized Images}, + booktitle = {International Conference on Machine Learning ({ICML})}, + year = {2016}, + archiveprefix = {arXiv}, + eprint = {1603.03417}, + url = {http://proceedings.mlr.press/v48/ulyanov16.pdf}, +} + +@InProceedings{UVL2017, + author = {Ulyanov, Dmitry and Vedaldi, Andrea and Lempitsky, Viktor S.}, + title = {Improved Texture Networks: Maximizing Quality and Diversity in Feed-Forward Stylization and Texture Synthesis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition ({CVPR})}, + year = {2017}, + archiveprefix = {arXiv}, + doi = {10.1109/CVPR.2017.437}, + eprint = {1701.02096}, +} + +@InProceedings{ZF2014, + author = {Zeiler, Matthew D. and Fergus, Rob}, + title = {Visualizing and Understanding Convolutional Networks}, + booktitle = {European Conference on Computer Vision ({ECCV})}, + year = {2014}, + archiveprefix = {arXiv}, + doi = {10.1007/978-3-319-10590-1_53}, + eprint = {1311.2901}, +} + +@InProceedings{ML2019, + author = {Meier, Philip and Lohweg, Volker}, + title = {Content Representation for Neural Style Transfer Algorithms based on Structural Similarity}, + booktitle = {Proceedings of the Workshop Computational Intelligence}, + year = {2019}, +} + +@Article{2018, + title = {Is artificial intelligence set to become art’s next medium?}, + journal = {Christie's Inc.}, + year = {2018}, + howpublished = {Online, accessed 08.03.2021}, + url = {https://www.christies.com/about-us/contact/terms-and-conditions-for-website-use}, +} + +@Article{ML2020, + author = {Meier, Philip and Lohweg, Volker}, + title = {pystiche: A Framework for Neural Style Transfer}, + journal = {Journal of Open Source Software {JOSS}}, + year = {2020}, + doi = {10.21105/joss.02761}, +} + +@Article{BPRS2018, + author = {Baydin, Atilim Gunes and Pearlmutter, Barak A and Radul, Alexey Andreyevich and Siskind, Jeffrey Mark}, + title = {Automatic differentiation in machine learning: a survey}, + journal = {Journal of machine learning research}, + year = {2018}, + volume = {18}, + publisher = {Journal of Machine Learning Research}, +} + +@PhdThesis{Zho2006, + author = {Zhou, Dongxiao}, + title = {Texture analysis and synthesis using a generic Markov-Gibbs image model}, + school = {University of Auckland}, + year = {2006}, + url = {https://www.cs.auckland.ac.nz/~georgy/research/texture/thesis-html/thesis.html}, +} + +@Article{CDH+2018, + author = {Camerer, Colin F and Dreber, Anna and Holzmeister, Felix and Ho, Teck-Hua and Huber, Jürgen and Johannesson, Magnus and Kirchler, Michael and Nave, Gideon and Nosek, Brian A and Pfeiffer, Thomas and Altmejd, Adam and Buttrick, Nick and Chan, Taizan and Chen, Yiling and Forsell, Eskil and Gampa, Anup and Heikensten, Emma and Hummer, Lily and Imai, Taisuke and Isaksson, Siri and Manfredi, Dylan and Rose, Julia and Wagenmakers, Eric-Jan and Wu, Hang}, + title = {{Evaluating the replicability of social science experiments in Nature and Science between 2010 and 2015}}, + journal = {Nature Human Behaviour}, + year = {2018}, + volume = {2}, + number = {9}, + pages = {637--644}, + abstract = {{Being able to replicate scientific findings is crucial for scientific progress1–15. We replicate 21 systematically selected experimental studies in the social sciences published in Nature and Science between 2010 and 201516–36. The replications follow analysis plans reviewed by the original authors and pre-registered prior to the replications. The replications are high powered, with sample sizes on average about five times higher than in the original studies. We find a significant effect in the same direction as the original study for 13 (62\%) studies, and the effect size of the replications is on average about 50\% of the original effect size. Replicability varies between 12 (57\%) and 14 (67\%) studies for complementary replicability indicators. Consistent with these results, the estimated true-positive rate is 67\% in a Bayesian analysis. The relative effect size of true positives is estimated to be 71\%, suggesting that both false positives and inflated effect sizes of true positives contribute to imperfect reproducibility. Furthermore, we find that peer beliefs of replicability are strongly related to replicability, suggesting that the research community could predict which results would replicate and that failures to replicate were not the result of chance alone. Camerer et al. carried out replications of 21 Science and Nature social science experiments, successfully replicating 13 out of 21 (62\%). Effect sizes of replications were about half of the size of the originals.}}, + doi = {10.1038/s41562-018-0399-z}, +} + +@Article{AAA+2015, + author = {Aarts, Alexander and Anderson, Joanna and Anderson, Christopher and Attridge, Peter and Attwood, Angela and Axt, Jordan and Babel, Molly and Bahník, Štěpán and Baranski, Erica and Barnett-Cowan, Michael and Bartmess, Elizabeth and Beer, Jennifer and Bell, Raoul and Bentley, Heather and Beyan, Leah and Binion, Grace and Borsboom, Denny and Bosch, Annick and Bosco, Frank and Penuliar, Mike}, + title = {Estimating the reproducibility of psychological science}, + journal = {Science}, + year = {2015}, + volume = {349}, + month = {08}, + doi = {10.1126/science.aac4716}, +} + +@Article{Raf2020, + author = {Raff, Edward}, + title = {Quantifying Independently Reproducible Machine Learning}, + journal = {The Gradient}, + year = {2020}, + howpublished = {Online, accessed 20.04.2021}, + url = {https://thegradient.pub/independently-reproducible-machine-learning/}, +} + +@Article{Hut2018, + author = {Hutson, Matthew}, + title = {Artificial intelligence faces reproducibility crisis}, + journal = {Science (New York, N.Y.)}, + year = {2018}, + doi = {10.1126/science.359.6377.725}, +} + +@Article{Bak2016, + author = {Baker, Monya}, + title = {1,500 scientists lift the lid on reproducibility}, + journal = {Springer Nature}, + year = {2016}, + howpublished = {Online, accessed 20.04.2021}, + url = {https://www.nature.com/news/1-500-scientists-lift-the-lid-on-reproducibility-1.19970}, +} + +@Article{PVS+2020, + author = {Pineau, Joelle and Vincent-Lamarre, Philippe and Sinha, Koustuv and Larivi{\`e}re, Vincent and Beygelzimer, Alina and d'Alch{\'e}-Buc, Florence and Fox, Emily and Larochelle, Hugo}, + title = {Improving reproducibility in machine learning research (a report from the neurips 2019 reproducibility program)}, + journal = {arXiv preprint arXiv:2003.12206}, + year = {2020}, +} + +@Article{Ben2020, + author = {Bengio, Yoshua}, + title = {Time to rethink the publication process in machine learning}, + journal = {Yoshua Bengio’s blog}, + year = {2020}, + howpublished = {Online, accessed 26.04.2021}, + url = {https://yoshuabengio.org/2020/02/26/time-to-rethink-the-publication-process-in-machine-learning/}, +} + +@Article{Cha2019, + author = {Charrez, Diego}, + title = {NeurIPS 2019 Stats}, + journal = {Medium}, + year = {2019}, + howpublished = {Online, accessed 26.04.2021}, + url = {https://medium.com/@dcharrezt/neurips-2019-stats-c91346d31c8f}, +} + +@Article{CKK+2015, + author = {Cacioppo, John T and Kaplan, Robert M and Krosnick, Jon A and Olds, James L and Dean, Heather}, + title = {Social, behavioral, and economic sciences perspectives on robust and reliable science}, + journal = {Report of the Subcommittee on Replicability in Science Advisory Committee to the National Science Foundation Directorate for Social, Behavioral, and Economic Sciences}, + year = {2015}, +} + +@Misc{CheckList2020, + title = {The Machine Learning Reproducibility Checklist (v2.0, Apr.7 2020)}, + howpublished = {Online, accessed 26.04.2021}, + url = {https://www.cs.mcgill.ca/~jpineau/ReproducibilityChecklist.pdf}, +} + +@Article{Pet2018, + author = {PeteWarden,}, + title = {The Machine Learning Reproducibility Crisis}, + journal = {Pete Warden's blog}, + year = {2018}, + howpublished = {Online, accessed 26.04.2021}, + url = {https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/}, +} + +@InProceedings{SZ2015, + author = {Karen Simonyan and Andrew Zisserman}, + title = {Very Deep Convolutional Networks for Large-Scale Image Recognition}, + booktitle = {International Conference on Learning Representations}, + year = {2015}, +} + +@InProceedings{LMB+2014, + author = {Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + title = {Microsoft coco: Common objects in context}, + booktitle = {European conference on computer vision}, + year = {2014}, + pages = {740--755}, + organization = {Springer}, +} + +@Article{RDS+2015, + author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei}, + title = {{ImageNet Large Scale Visual Recognition Challenge}}, + journal = {International Journal of Computer Vision (IJCV)}, + year = {2015}, + volume = {115}, + number = {3}, + pages = {211-252}, + doi = {10.1007/s11263-015-0816-y}, +} + +@Article{ZLX+2014, + author = {Zhou, Bolei and Lapedriza, Agata and Xiao, Jianxiong and Torralba, Antonio and Oliva, Aude}, + title = {Learning deep features for scene recognition using places database}, + year = {2014}, + publisher = {Neural Information Processing Systems Foundation}, +} + +@InProceedings{He2016, + author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + title = {Deep residual learning for image recognition}, + year = {2016}, +} + +@InProceedings{EL1999, + author = {Efros, Alexei A. and Leung, Thomas K.}, + title = {Texture synthesis by non-parametric sampling}, + booktitle = {Proceedings of the 7\textsuperscript{th} IEEE International Conference on Computer Vision (ICCV)}, + year = {1999}, + abstract = {A non-parametric method for texture synthesis is proposed. The texture synthesis process grows a new image outward from an initial seed, one pixel at a time. A Markov random field model is assumed, and the conditional distribution of a pixel given all its neighbors synthesized so far is estimated by querying the sample image and finding all similar neighborhoods. The degree of randomness is controlled by a single perceptually intuitive parameter. The method aims at preserving as much local structure as possible and produces good results for a wide variety of synthetic and real-world textures.}, + doi = {10.1109/ICCV.1999.790383}, + timestamp = {2019-11-11}, +} + +@Article{PS2000, + author = {Portilla, Javier and Simoncelli, Eero P.}, + title = {A Parametric Texture Model Based on Joint Statistics of Complex Wavelet Coefficients}, + journal = {International Journal of Computer Vision (IJCV)}, + year = {2000}, + volume = {40}, + abstract = {We present a universal statistical model for texture images in the context of an overcomplete complex wavelet transform. The model is parameterized by a set of statistics computed on pairs of coefficients corresponding to basis functions at adjacent spatial locations, orientations, and scales. We develop an efficient algorithm for synthesizing random images subject to these constraints, by iteratively projecting onto the set of images satisfying each constraint, and we use this to test the perceptual validity of the model. In particular, we demonstrate the necessity of subgroups of the parameter set by showing examples of texture synthesis that fail when those parameters are removed from the set. We also demonstrate the power of our model by successfully synthesizing examples drawn from a diverse collection of artificial and natural textures.}, + day = {01}, + doi = {10.1023/A:1026553619983}, + timestamp = {2019-11-12}, +} + +@Book{Glas2021, + author = {Glassner, A.}, + publisher = {No Starch Press}, + title = {Deep Learning: A Visual Approach}, + year = {2021}, + isbn = {9781718500730}, +} + +@Book{Alp2020, + author = {Alpaydin, E.}, + publisher = {MIT Press}, + title = {Introduction to Machine Learning, fourth edition}, + year = {2020}, + isbn = {9780262043793}, + series = {Adaptive Computation and Machine Learning series}, + lccn = {2019028373}, + url = {https://books.google.de/books?id=tZnSDwAAQBAJ}, +} + +@Article{Ioa2021, + author = {Ioannou, Lefteris}, + journal = {Medium}, + title = {Neural Style Transfer: Past, present, future}, + year = {2021}, + howpublished = {Online, accessed 11.02.2022}, + url = {https://jrrlefteris6.medium.com/neural-style-transfer-past-present-future-b951977f0dee}, +} + +@Article{Kel2018, + author = {Kelly, Christopher}, + journal = {Medium}, + title = {Real-Time Style Transfer for iOS— Transform your photos and videos into masterpieces}, + year = {2018}, + howpublished = {Online, accessed 11.02.2022}, + url = {https://medium.com/@ghop02/real-time-style-transfer-for-ios-transform-your-photos-and-videos-into-masterpieces-f04111fcd2ff}, +} + +@Comment{jabref-meta: databaseType:bibtex;} + +disabled till definition is verified +STRING{IEEE_O_DSO = "{IEEE} Distrib. Syst. Online"} + + + + + +-- +EOF diff --git a/rescience_paper/content.tex b/rescience_paper/content.tex new file mode 100644 index 00000000..1cb53f4c --- /dev/null +++ b/rescience_paper/content.tex @@ -0,0 +1,9 @@ + +\input{sections/introduction.tex} +\input{sections/preliminaries.tex} +\input{sections/replicated_Paper.tex} +\input{sections/methology.tex} +\input{sections/results.tex} +\input{sections/discussion.tex} +\input{sections/acknowledgements.tex} +\input{sections/appendix.tex} diff --git a/rescience_paper/download.py b/rescience_paper/download.py new file mode 100644 index 00000000..e129e600 --- /dev/null +++ b/rescience_paper/download.py @@ -0,0 +1,66 @@ +import pathlib +import shutil +import tempfile +from typing import Collection + +from torchvision.datasets.utils import download_and_extract_archive + +HERE = pathlib.Path(__file__).parent + + +def main(): + tmp_dir = pathlib.Path(tempfile.mkdtemp()) + download_template(tmp_dir) + download_images(tmp_dir) + insert_preamble() + + +def download_template( + tmp_dir: pathlib.Path, + *, + commit_hash: str = "75bf717a6b07f0f95e197e7cb8cbdbeee8bb564b", + executable: Collection[str] = ( + "yaml-to-latex.py", + ), + excluded: Collection[str] = ( + "bibliography.bib", + ".gitignore", + "content.tex", + "metadata.yaml", + "README.md", + ), +) -> None: + download_and_extract_archive( + f"https://github.com/ReScience/template/archive/{commit_hash}.zip", + str(tmp_dir), + ) + + template_dir = tmp_dir / f"template-{commit_hash}" + for file_or_dir in template_dir.glob("*"): + if file_or_dir.name not in excluded: + if file_or_dir.name in executable: + file_or_dir.chmod(file_or_dir.stat().st_mode | 0o100111) + shutil.move(str(file_or_dir), str(HERE)) + + +def download_images( + tmp_dir: pathlib.Path, + *, + url="https://download.pystiche.org/replication-paper/images.tar.gz", +) -> None: + download_and_extract_archive(url, str(tmp_dir), extract_root=str(HERE / "graphics")) + + +def insert_preamble(): + with open(HERE / "article.tex", "r+") as file: + lines = file.readlines() + + idx = next(idx for idx, line in enumerate(lines) if line.strip() == r"\begin{document}") + lines.insert(idx, "\\input{preamble.tex}\n\n") + + file.seek(0) + file.writelines(lines) + + +if __name__ == "__main__": + main() diff --git a/rescience_paper/graphics/.gitignore b/rescience_paper/graphics/.gitignore new file mode 100644 index 00000000..187d3623 --- /dev/null +++ b/rescience_paper/graphics/.gitignore @@ -0,0 +1 @@ +/images diff --git a/rescience_paper/graphics/nst/example_images.tex b/rescience_paper/graphics/nst/example_images.tex new file mode 100644 index 00000000..4d125a4e --- /dev/null +++ b/rescience_paper/graphics/nst/example_images.tex @@ -0,0 +1,34 @@ +\documentclass[class=journal]{standalone} + +\usepackage{physics} +\usepackage{amsmath} +\usepackage{tikz} +\usepackage{mathdots} +\usepackage{yhmath} +\usepackage{cancel} +\usepackage{color} +\usepackage{siunitx} +\usepackage{array} +\usepackage{multirow} +\usepackage{amssymb} +\usepackage{gensymb} +\usepackage{tabularx} +\usepackage{booktabs} +\usetikzlibrary{fadings} +\usetikzlibrary{patterns} +\usetikzlibrary{shadows.blur} +\usetikzlibrary{shapes} + + +\begin{document} +\tikzset{every picture/.style={line width=0.75pt}} %set default line width to 0.75pt +\begin{tikzpicture}[x=0.7pt,y=0.7pt,yscale=-1,xscale=1] + \draw (288.25,67.5) node {\includegraphics[width=129.38pt,height=89.25pt]{images/nst/source/mosaic_stones.jpg}}; + \draw (115.75,67.5) node {\includegraphics[width=129.38pt,height=89.25pt]{images/nst/source/colorful_bird.jpg}}; + \draw (460.75,67.5) node {\includegraphics[width=129.38pt,height=89.25pt]{images/nst/results/nst_IST_paper_bird__mosaic__full.jpg}}; + \draw [line width=2.25, color=white] (193,69.2) -- (223, 69.2) ; + \draw [line width=2.25, color=white] (208,84.2) -- (208,54.2) ; + \draw [line width=3, color=white] (355,75.7) -- (383,75.7) ; + \draw [line width=3, color=white] (355,62.7) -- (383,62.7) ; +\end{tikzpicture} +\end{document} diff --git a/rescience_paper/metadata.yaml b/rescience_paper/metadata.yaml new file mode 100644 index 00000000..dd808a2f --- /dev/null +++ b/rescience_paper/metadata.yaml @@ -0,0 +1,130 @@ +# To be filled by the author(s) at the time of submission +# ------------------------------------------------------- + +# Title of the article: +# - For a successful replication, it should be prefixed with "[Re]" +# - For a failed replication, it should be prefixed with "[¬Re]" +# - For other article types, no instruction (but please, not too long) +title: "[Re] Neural Style Transfer: A Replication Study" + +# List of authors with name, orcid number, email and affiliation +# Affiliation "*" means contact author (required even for single-authored papers) +authors: + - name: Julian Bültemeier + orcid: 0000-0001-8595-728X + email: julian.bueltemeier@th-owl.de + affiliations: "1,*" + + - name: Philip Meier + orcid: 0000-0002-5184-1622 + email: pmeier@quansight.com + affiliations: 2 + + - name: Christoph-Alexander Holst + orcid: 0000-0002-6253-7036 + email: christoph-alexander.holst@th-owl.de + affiliations: 1 + + - name: Volker Lohweg + orcid: 0000-0002-3325-7887 + email: volker.lohweg@th-owl.de + affiliations: 1 + + + +# List of affiliations with code (corresponding to author affiliations), name +# and address. You can also use these affiliations to add text such as "Equal +# contributions" as name (with no address). +affiliations: + - code: 1 + name: inIT – Institute Industrial IT + address: Lemgo, Germany + + - code: 2 + name: Quansight + address: Austin Texas, United States + + + + +# List of keywords (adding the programming language might be a good idea) +keywords: python, pyTorch, pystiche, Neural Style Transfer (NST), Convolutional Neural Network (CNN) + +# Code URL and DOI/SWH (url is mandatory for replication, doi after acceptance) +# You can get a DOI for your code from Zenodo, or an SWH identifier from +# Software Heritage. +# see https://guides.github.com/activities/citable-code/ +code: + - url: https://github.com/pystiche/papers + - doi: + - swh: + +# Data URL and DOI (optional if no data) +data: + - url: + - doi: + +# Information about the original article that has been replicated +replication: + - cite: # Full textual citation + - bib: # Bibtex key (if any) in your bibliography file + - url: # URL to the PDF, try to link to a non-paywall version + - doi: # Regular digital object identifier + +# Don't forget to surround abstract with double quotes +abstract: "The manipulation of digital images or videos to match the look or visual style of another image comes from the field of Non-Photorealistic Rendering and started back around 1990. However, in recent years, a new class of software algorithms called Neural Style Transfer has emerged in this field with impressive results. Therefore, Neural Style Transfer has been a trending topic in academic research and image processing applications since its introduction in 2016. As a result, many different approaches have been published in this field in a short time. The problem of this rapid development with many publications is the lack of standardisation and the occurrence of bugs that lead to undocumented behaviour. This makes replication of the results difficult or even impossible. For this reason, a replication study of seven papers in the field of Neural Style Transfer has been conducted to highlight these shortcomings." + +# Bibliography file (yours) +bibliography: bibliography.bib + +# Type of the article +# Type can be: +# * Editorial +# * Letter +# * Replication +type: Replication + +# Scientific domain of the article (e.g. Computational Neuroscience) +# (one domain only & try to be not overly specific) +domain: Machine Learning (ML) + +# Coding language (main one only if several) +language: python + + +# To be filled by the author(s) after acceptance +# ----------------------------------------------------------------------------- + +# For example, the URL of the GitHub issue where review actually occured +review: + - url: + +contributors: + - name: + orcid: + role: editor + - name: + orcid: + role: reviewer + - name: + orcid: + role: reviewer + +# This information will be provided by the editor +dates: + - received: November 1, 2018 + - accepted: + - published: + +# This information will be provided by the editor +article: + - number: # Article number will be automatically assigned during publication + - doi: # DOI from Zenodo + - url: # Final PDF URL (Zenodo or rescience website?) + +# This information will be provided by the editor +journal: + - name: "ReScience C" + - issn: 2430-3658 + - volume: 4 + - issue: 1 diff --git a/rescience_paper/nst_images_script.py b/rescience_paper/nst_images_script.py new file mode 100644 index 00000000..b71a9751 --- /dev/null +++ b/rescience_paper/nst_images_script.py @@ -0,0 +1,284 @@ +import os +from typing import Sequence, Tuple, cast +from argparse import Namespace +from os import path + +from torch import nn +from pystiche import misc, optim, loss, enc, ops, image, data + + +def compute_layer_weights( + layers: Sequence[str], multi_layer_encoder: enc.MultiLayerEncoder +) -> Tuple[float, ...]: + def find_out_channels(multi_layer_encoder: nn.Module, layer: str) -> int: + modules = multi_layer_encoder._modules + layers = list(modules.keys()) + layers = reversed(layers[: layers.index(layer) + 1]) + for layer_ in layers: + try: + return cast(int, modules[layer_].out_channels) + except AttributeError: + pass + + raise RuntimeError( + f"Neither '{layer}' nor any previous layer has an 'out_channels' " + f"attribute." + ) + + num_channels = [] + for layer in layers: + if layer not in multi_layer_encoder: + raise ValueError(f"Layer {layer} is not part of the multi_layer_encoder.") + + num_channels.append(find_out_channels(multi_layer_encoder, layer)) + + return tuple(1.0 / n**2.0 for n in num_channels) + + +def nst_images(): + images_ = { + "bird": data.DownloadableImage( + "https://free-images.com/md/b9c1/colorful_bird_rainbow_parakeet.jpg", + license=data.PublicDomainLicense(), + md5="3eab0e8a32e020b40536154acdc05ab4", + file="colorful_bird.jpg", + ), + "mosaic": data.DownloadableImage( + "https://free-images.com/md/ab85/mosaic_stones_structure_pattern.jpg", + license=data.PublicDomainLicense(), + md5="afa9e5024aff029753a6901cdc19bedc", + file="mosaic_stones.jpg", + ), + "starry_night": data.DownloadableImage( + "https://upload.wikimedia.org/wikipedia/commons/thumb/e/ea/Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg/1280px-Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg", + title="Starry Night", + author="Vincent van Gogh", + date="ca. 1889", + license=data.ExpiredCopyrightLicense(1890), + md5="372e5bc438e3e8d0eb52cc6f7ef44760", + ), + } + return data.DownloadableImageCollection(images_) + + +def nst( + args, + content_image, + style_image, + output_name, + content_layer="relu4_2", + content_weight=1e0, + style_layers=("relu1_1", "relu2_1", "relu3_1", "relu4_1"), + style_weight=1e3, + starting_point="content", + num_steps=500, +): + multi_layer_encoder = enc.vgg19_multi_layer_encoder( + weights="caffe", internal_preprocessing=True, allow_inplace=True + ) + + # content loss + content_encoder = multi_layer_encoder.extract_encoder(content_layer) + content_loss = ops.FeatureReconstructionOperator( + content_encoder, score_weight=content_weight + ) + + # style loss + layer_weights = compute_layer_weights(style_layers, multi_layer_encoder) + + def get_style_op(encoder, layer_weight): + return ops.GramOperator(encoder, score_weight=layer_weight) + + style_loss = ops.MultiLayerEncodingOperator( + multi_layer_encoder, + style_layers, + get_style_op, + score_weight=style_weight, + layer_weights=layer_weights, + ) + + perceptual_loss = loss.PerceptualLoss(content_loss, style_loss).to(args.device) + + images = nst_images() + images.download(args.image_source_dir) + perceptual_loss.set_content_image(content_image) + perceptual_loss.set_style_image(style_image) + input_image = misc.get_input_image(starting_point, content_image=content_image) + + output_image = optim.image_optimization( + input_image, perceptual_loss, num_steps=num_steps + ) + + output_file = path.join(args.image_results_dir, output_name) + image.write_image(output_image, output_file) + + +def full_nst(args, content, style, size=500): + images = nst_images() + images.download(args.image_source_dir) + content_image = images[content].read(size=size, device=args.device) + style_image = images[style].read(size=size, device=args.device) + name = f"nst_IST_paper_{content}__{style}__full.jpg" + nst(args, content_image, style_image, name) + + +def layer_nst(args, content, style, size=500): + images = nst_images() + images.download(args.image_source_dir) + + content_image = images[content].read(size=size, device=args.device) + style_image = images[style].read(size=size, device=args.device) + + # style loss + style_layers = [ + "relu1_1", + "relu2_2", + "relu3_1", + "relu4_1", + ] + + for layer in style_layers: + name = f"nst_IST_paper_{content}__{style}__{layer}.jpg" + nst( + args, + content_image, + style_image, + name, + style_layers=[layer], + style_weight=1e3, + starting_point="content", + ) + + +def init_nst(args, content, style, size=500): + images = nst_images() + images.download(args.image_source_dir) + + content_image = images[content].read(size=size, device=args.device) + style_image = images[style].read(size=size, device=args.device) + + for starting_point in ["random", "content"]: + name = f"nst_IST_paper_{content}__{style}__start_{starting_point}.jpg" + nst( + args, + content_image, + style_image, + name, + style_weight=1e1, + num_steps=1000, + starting_point=starting_point, + ) + + +def iteration_nst(args, content, style, size=500): + images = nst_images() + images.download(args.image_source_dir) + + content_image = images[content].read(size=size, device=args.device) + style_image = images[style].read(size=size, device=args.device) + + for num_steps in [100, 500, 1000]: + name = f"nst_IST_paper_{content}__{style}__iteration_{num_steps}.jpg" + nst( + args, + content_image, + style_image, + name, + num_steps=num_steps, + starting_point="random", + style_weight=1e1, + ) + + +def image_size_nst(args, content, style): + images = nst_images() + images.download(args.image_source_dir) + for image_size in [256, 512]: + content_image = images[content].read(size=image_size, device=args.device) + style_image = images[style].read(size=image_size, device=args.device) + + name = f"nst_IST_paper_{content}__{style}__size_{image_size}.jpg" + nst(args, content_image, style_image, name) + + +def weights_nst(args, content, style, size=500): + images = nst_images() + images.download(args.image_source_dir) + for weight in [1e0, 1e1, 1e2, 1e3]: + content_image = images[content].read(size=size, device=args.device) + style_image = images[style].read(size=size, device=args.device) + + name = f"nst_IST_paper_{content}__{style}__style_weight_{weight}.jpg" + nst( + args, + content_image, + style_image, + name, + style_weight=weight, + starting_point="random", + num_steps=1000, + ) + + +def style_generation_nst(args, size=500): + style = "starry_night" + images = nst_images() + images.download(args.image_source_dir) + content_image = images[style].read(size=size, device=args.device) + style_image = images[style].read(size=size, device=args.device) + + name = f"nst_IST_paper__{style}__generated.jpg" + nst( + args, + content_image, + style_image, + name, + content_weight=0, + starting_point="random", + num_steps=1000, + ) + + +def parse_input(): + # TODO: write CLI + image_source_dir = None + image_results_dir = None + device = None + + def process_dir(dir): + dir = path.abspath(path.expanduser(dir)) + os.makedirs(dir, exist_ok=True) + return dir + + here = path.dirname(__file__) + + if image_source_dir is None: + image_source_dir = path.join(here, "graphics", "images", "nst", "source") + image_source_dir = process_dir(image_source_dir) + + if image_results_dir is None: + image_results_dir = path.join(here, "graphics", "images", "nst", "results") + image_results_dir = process_dir(image_results_dir) + + device = misc.get_device(device=device) + logger = optim.OptimLogger() + + return Namespace( + image_source_dir=image_source_dir, + image_results_dir=image_results_dir, + device=device, + logger=logger, + ) + + +if __name__ == "__main__": + args = parse_input() + content = "bird" + style = "mosaic" + full_nst(args, content, style) + layer_nst(args, content, style) + # init_nst(args, content, style) + # iteration_nst(args, content, style) + # image_size_nst(args, content, style) + # weights_nst(args, content, style) + # style_generation_nst(args) diff --git a/rescience_paper/preamble.tex b/rescience_paper/preamble.tex new file mode 100644 index 00000000..ab5797bd --- /dev/null +++ b/rescience_paper/preamble.tex @@ -0,0 +1,120 @@ +\usepackage{standalone} +\usepackage{lscape} +\usepackage{amsfonts} +\usepackage{xspace} +\usepackage{hyperref} +\usepackage{multirow} +\usepackage{hyperref} +\usepackage{adjustbox} + +\usepackage{rotating,capt-of} + +\graphicspath{{graphics/}} + +% referencing commands +\newcommand*{\secname}{Section} +\newcommand*{\secref}[1]{\secname{}~\ref{#1}} + +\newcommand*{\figname}{Figure} +\newcommand*{\figref}[1]{\figname{}~\ref{#1}} + +\newcommand*{\tabname}{Table} +\newcommand*{\tabref}[1]{\tablename{}~\ref{#1}} + +\newcommand*{\appname}{Appendix} +\newcommand*{\appref}[1]{\appname{}~\ref{#1}} + +\newcommand{\urlfootnote}[3]{\footnote{#1, \url{#2}, retrieved on #3.}} +\newcommand{\urlfootnotename}[4]{\footnote{#1, \href{#2}{#3}, retrieved on #4.}} +% glossaries +\usepackage[acronyms]{glossaries} +\newcommand{\newacr}[4][]{\newacronym[ + sort={\ifthenelse{\isempty{#1}}{#2}{#1}}, + ]{#2}{#3}{#4}} +\glsdisablehyper +\defglsdisplayfirst[\acronymtype]{\emph{\glsentrylong{\glslabel}} (\glsentryshort{\glslabel})} + +\newacr{NST}{NST}{Neural Style Transfer} +\newacr{DLF}{DLF}{Deep Learning Framework} +\newacr{CV}{CV}{Computer Vision} +\newacr{GAN}{GAN}{Generative Adversarial Network} +\newacr{DL}{DL}{Deep Learning} +\newacr{ML}{ML}{Machine Learning} +\newacr{CNN}{CNN}{Convolutional Neural Network} +\newacr{MSE}{MSE}{mean squared error} +\newacr{SE}{SE}{squared error} +\newacr{MRF}{MRF}{\textsc{Markov} random field} +\newacr{VGG}{VGG}{Visual Geometry Group} +\newacr{GPU}{GPU}{Graphical Processing Unit} + +% helpful commands +\newcommand*{\eqq}[1]{''#1''} + +%math commands +\newcommand{\realnumbers}{\ensuremath{\mathbb{R}}} + +\renewcommand{\matrix}[1]{\ensuremath{\boldsymbol{\mathrm{#1}}}} +\newcommand{\tensor}[1]{\ensuremath{\boldsymbol{\mathsf{#1}}}} + +\newcommand{\transformer}{\ensuremath{\mathcal{T}}} + +\newcommand{\paper}{\texttt{Pa\-per}} +\newcommand{\implementation}{\texttt{Im\-ple\-men\-ta\-ti\-on}} + +\newcommand{\image}{\tensor{I}} +\newcommand{\contentimage}{\ensuremath{\image_\text{C}}} +\newcommand{\styleimage}{\ensuremath{\image_\text{S}}} +\newcommand{\loss}{\ensuremath{\mathcal{L}}} +\newcommand{\contentloss}{\ensuremath{\mathcal{L}_{\text{C}}}} +\newcommand{\styleloss}{\ensuremath{\mathcal{L}_{\text{S}}}} + +\newcommand{\mean}{\ensuremath{\overline{\sum}}} +\newcommand{\transpose}[1]{\ensuremath{#1^T}} +\newcommand{\argmin}[2]{\ensuremath{\underset{#1}{\text{arg min}}\:#2}} +\newcommand{\argmax}[2]{\ensuremath{\underset{#1}{\text{arg max}}\:#2}} + +\newcommand{\eqspace}{\,} +\newcommand{\Eqspace}{\quad} +\newcommand{\eqcommasep}{,\eqspace} + +\newcommand{\eqtextdot}{\ensuremath{\text{.}}} +\newcommand{\eqtextcomma}{\ensuremath{\text{,}}} +\newcommand{\eqtextand}{\ensuremath{\text{and}}} +\newcommand{\eqtextwith}{\ensuremath{\text{with}}} + +\DeclarePairedDelimiter{\@parentheses}{(}{)} +\newcommand{\parentheses}[1]{\ensuremath{\@parentheses*{#1}}} +\makeatletter +\newcommand{\etal}[1]{\textsc{#1} et al.\xspace{}} +\makeatother +\newcommand{\of}[1]{\parentheses{#1}} +\newcommand{\fun}[2]{\ensuremath{\text{#1}\of{#2}}} + + +\newcommand{\spatvec}[1]{\fun{spatvec}{#1}} +\newcommand{\spatvecinv}[1]{\fun{spatvec$^{-1}$}{#1}} +\newcommand{\gram}[1]{\fun{gram}{#1}} + +\newcommand{\imagecredits}[3]{\bigskip#1, #2 \\\url{#3}} +\newcommand{\imagecreditshref}[4]{\bigskip#1, #2 \\\href{#3}{#4}} + + +% footnote referenz multiple times +\newcommand{\footlabel}[2]{% + \addtocounter{footnote}{1}% + \footnotetext[\thefootnote]{% + \addtocounter{footnote}{-1}% + \refstepcounter{footnote}\label{#1}% + #2% + }% + $^{\ref{#1}}$% +} + +\newcommand{\footnoteref}[1]{% + $^{\ref{#1}}$% +} + +\usepackage{pifont} +\newcommand{\cmark}{\ding{51}}% +\newcommand{\tcmark}{(\ding{51})}% +\newcommand{\xmark}{\ding{55}}% diff --git a/rescience_paper/sections/acknowledgements.tex b/rescience_paper/sections/acknowledgements.tex new file mode 100644 index 00000000..7f485402 --- /dev/null +++ b/rescience_paper/sections/acknowledgements.tex @@ -0,0 +1,3 @@ +\section{Acknowledgements??????????????} + +The authors would like to thank the authors of the replicated paper for the helpful explanations and providing implementation details about the original works. This contribution is part of the project Fused Security Features, which is funded by the Ministry for Culture and Science of North Rhine-Westphalia (MKW NRW) under the Grant ID 005-1703-0013. \ No newline at end of file diff --git a/rescience_paper/sections/appendix.tex b/rescience_paper/sections/appendix.tex new file mode 100644 index 00000000..8eebc0d3 --- /dev/null +++ b/rescience_paper/sections/appendix.tex @@ -0,0 +1,1034 @@ +\newpage +\section{Appendix} + +\subsection{Image credits} \label{app:image_credits} + +\textcolor{red}{TODO: complete when all images are complete} + +We have used images in this work that are marked as public domain or fall under licences such as the Creative Commons CC0 licence. These licences allow the images to be used in a scientific work and allow modifications of the content. All images that are not accredited come from the author of this study. + +It may happen that a single image is integrated several times in this list with different sources. This is because some of these images are in the repositories of the original publication and we use these images in these cases. Below is a list of the images and the corresponding figures in which the images appear: + +\imagecredits{\figref{fig:fig_nst}}{Bird}{https://pixabay.com/photos/colorful-bird-rainbow-parakeet-1311016/} + +\imagecredits{\figref{fig:fig_nst}}{Mosaic}{https://pixabay.com/photos/mosaic-stones-structure-template-755722/} + +\imagecredits{\figref{fig:generated_style_nst} and \figref{fig:GEB2016_fig_2}}{Vincent van Gogh, \eqq{The Starry Night}}{https://en.wikipedia.org/wiki/File:Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg} + +\imagecredits{\figref{fig:GEB2016_fig_2} and \figref{fig:ulyanov_et_al_2016_impl}}{Neckarfront Tübingen}{https://commons.wikimedia.org/wiki/File:Tuebingen_Neckarfront.jpg} + +\imagecredits{\figref{fig:GEB2016_fig_2}}{William Turner, \eqq{The Shipwreck of the Minotaur}}{https://www.artble.com/imgs/c/d/1/98090/the_shipwreck_of_the_minotaur.jpg} + +\imagecredits{\figref{fig:GEB2016_fig_2}}{Edvard Munch, \eqq{The Scream}}{https://commons.wikimedia.org/wiki/File:The_Scream.jpg} + +\imagecredits{\figref{fig:GEB2016_fig_2}}{Pablo Picasso, \eqq{ Figure dans un Fauteuil}}{https://en.wikipedia.org/wiki/File:Pablo_Picasso,_1909-10,_Figure_dans_un_Fauteuil_(Seated_Nude,_Femme_nue_assise),_oil_on_canvas,_92.1_x_73_cm,_Tate_Modern,_London.jpg} + +\imagecredits{\figref{fig:GEB2016_fig_2}}{Wassily Kandinsky, \eqq{Composition VII}}{https://de.wikipedia.org/wiki/Datei:Vassily_Kandinsky,_1913_-_Composition_7.jpg} + +\imagecredits{\figref{fig:LW2016_fig6}}{Portrait1}{https://raw.githubusercontent.com/chuanli11/CNNMRF/master/data/content/potrait1.jpg} + +\imagecredits{\figref{fig:LW2016_fig6}}{Pablo Picasso, \eqq{Self-portrait}}{https://raw.githubusercontent.com/chuanli11/CNNMRF/master/data/style/picasso.jpg} + +\imagecredits{\figref{fig:LW2016_fig6}}{Wassily Kandinsky, \eqq{Composition VIII}}{https://www.wassilykandinsky.net/work-50.php} + +\imagecreditshref{\figref{fig:LW2016_fig6}}{\emph{theilr}, \eqq{S}}{https://www.flickr.com/photos/theilr/9270411440/in/photolist-f8chm1-2di8AWD-8fQR9T-7rFKyH-saiwW-zpadej-qWGN8h-5XPnhU-c8xQQU-4LfwCV-xkpyx-u7btA-6tQSSb-dTWGR5-A3KxB-c8xRy1-wuvjG-bWCTQF-8iWWZX-A3Kzp-udguk-wuAMP-duvkYh-fBiXrd-zSCmK-6KRmEh-dsHKin-wuvhg-cxQ5Zq-dfjcZ4-dfjd4a-8iWQmX-d6HCqC-7oroQq-62PsFn-dHpQ8U-dG72Bj-wuNFS-37MqGY-dRk9Md-4fjyzH-4Gikhb-36xtSU-A3KB2-moS7H-dzaoJi-y6uLQ-dS55AH-5ANWkG-zSCrd}{Flickr Link} + +\imagecredits{\figref{fig:GEB+2017_fig2}}{Watertown}{https://ae01.alicdn.com/img/pb/136/085/095/1095085136_084.jpg} + +\imagecredits{\figref{fig:GEB+2017_fig2}}{Vincent van Gogh, \eqq{Wheat Field with Cypresses}}{https://de.wikipedia.org/wiki/Datei:Wheat-Field-with-Cypresses-(1889)-Vincent-van-Gogh-Met.jpg} + +\imagecredits{\figref{fig:GEB+2017_fig3}}{Garden at the Schultenhof in Mettingen}{https://commons.wikimedia.org/wiki/File:Schultenhof_Mettingen_Bauerngarten_8.jpg} + +\imagecredits{\figref{fig:GEB+2017_fig3}}{Vincent van Gogh, \eqq{Starry Night Over the Rhône}}{https://commons.wikimedia.org/wiki/File:Starry_Night_Over_the_Rhone.jpg} + +\imagecredits{\figref{fig:johnson_alahi_li_2016_fig}}{Chicago}{https://github.com/jcjohnson/fast-neural-style/blob/master/images/content/chicago.jpg} + +\imagecredits{\figref{fig:johnson_alahi_li_2016_fig}}{Mosaic}{https://github.com/jcjohnson/fast-neural-style/blob/master/images/styles/mosaic.jpg} + +\imagecredits{\figref{fig:johnson_alahi_li_2016_fig}}{Vincent van Gogh, \eqq{The Starry Night}}{https://github.com/jcjohnson/fast-neural-style/blob/master/images/styles/starry_night.jpg} + +\imagecredits{\figref{fig:johnson_alahi_li_2016_fig} and \figref{fig:ulyanov_et_al_2016_impl}}{candy}{https://github.com/jcjohnson/fast-neural-style/blob/master/images/styles/candy.jpg} + +\imagecredits{\figref{fig:johnson_alahi_li_2016_fig}}{Edvard Munch, \eqq{The Scream}}{https://github.com/jcjohnson/fast-neural-style/blob/master/images/styles/the_scream.jpg} + +\imagecredits{\figref{fig:johnson_alahi_li_2016_fig}}{feathers}{https://github.com/jcjohnson/fast-neural-style/blob/master/images/styles/feathers.jpg} + +\imagecredits{\figref{fig:ulyanov_et_al_2016_impl}}{cat}{https://github.com/DmitryUlyanov/texture_nets/blob/texture_nets_v1/data/readme_pics/kitty.jpg} + +\imagecredits{\figref{fig:ulyanov_et_al_2016_impl}}{bird}{https://github.com/DmitryUlyanov/texture_nets/blob/texture_nets_v1/supplementary/bird.jpg} + +\imagecredits{\figref{fig:ulyanov_et_al_2016_impl}}{William Turner, \eqq{The Shipwreck of the Minotaur}}{https://github.com/DmitryUlyanov/texture_nets/blob/texture_nets_v1/supplementary/stylization_models/turner.jpg} + +\imagecredits{\figref{fig:ulyanov_et_al_2016_impl}}{pleades}{https://github.com/DmitryUlyanov/texture_nets/blob/texture_nets_v1/supplementary/stylization_models/pleades.jpg} + +\imagecredits{\figref{fig:ulyanov_et_al_2016_impl}}{Marie-Lan Nguyen, \eqq{Mosaic ducks Massimo}}{https://github.com/DmitryUlyanov/texture_nets/blob/texture_nets_v1/supplementary/stylization_models/mosaic.jpg} + +\newpage +\subsection{Gatys, Ecker, and Bethge 2016} \label{sec:parameters_appendix} + +\begin{figure}[H] +\vspace{19cm} +\begin{center} + \begin{rotate}{90} + \footnotesize + \centering + \begin{tabular}{c|c|c} + \hline + \bfseries Parameter & \bfseries Implementation & \bfseries Paper\\ + \hline\hline + image size & 512 & 512 \\ + image resize & Resize with bilinear interpolation (Pillow) & Resize with bilinear interpolation (Pillow)\\ + starting point & "content" & "random" \\ + number steps & 500 & 500 \\ + Pretrained Encoder & VGG19 with caffe weights$^1$ & VGG19 with caffe weights$^1$ \\ + Pre- and Postprocessing & Caffe$^2$ & Caffe$^2$\\ + \hline + \bfseries Optimizer & &\\ + \hline + type & LBFGS & LBFGS\\ + learning rate & 1.0 & 1.0\\ + max iter & 1 & 1 \\ + \hline + \bfseries Content Loss & & \\ + \hline + layer & "relu4\_2" & "conv4\_2" \\ + score weight & 1e0 & 1e0 \\ + score correction factor & 1e0 & 1.0 / 2.0 \\ + loss reduction & "mean" & "sum" \\ + \hline + \bfseries Style Loss & & \\ + \hline + layer & ("relu1\_1", "relu2\_1", "relu3\_1", "relu4\_1", "relu5\_1") & ("conv1\_1", "conv2\_1", "conv3\_1", "conv4\_1", "conv5\_1") \\ + score weight & 1e3$^4$ & 1e3$^4$ \\ + score correction factor & 1e0 & 1.0 / 4.0 \\ + layer weights & (2.4e-04, 6.1e-05, 1.5e-05, 3.8e-06, 3.8e-06)$^3$ & "mean" \\ + \hline + \multicolumn{3}{l}{ }\\ + \multicolumn{3}{l}{\scriptsize{$^1$ Multi-layer encoder based on the VGG architecture that was introduced by Simonyan and Zisserman in \cite{SZ2015}.}}\\ + \multicolumn{3}{l}{\scriptsize{$^2$ Pre- and Postprocessing for the caffe weights \cite{SZ2015}.}}\\ + \multicolumn{3}{l}{\scriptsize{$^3$ The layer\_weights are computed by $1/n^2$ where $n$ denotes the number of channels of a feature map from the corresponding layer in the pretrained Encoder.}}\\ + \multicolumn{3}{l}{\scriptsize{$^4$ Different information for the content/style ratio is given for the different images, more detailed information can be found in the text of the results.}} + \end{tabular} +\end{rotate} +\captionof{table}{The hyperparameters used from \implementation{} and \paper{} of \textsc{Gatys}, \textsc{Ecker}, and \textsc{Bethge} $2016$ \cite{GEB2016}.} +\label{tab:GEB2016_parameters} +\end{center} +\end{figure} + +\newpage +\begin{table*}[!t] + \renewcommand{\arraystretch}{1.3} + \caption{An overview of the adapted parameters during the optimisation provided by the author \cite{GEB2016}.} + \label{tab:Gatys_2016_adapted_params} + \centering + \begin{tabular}{c|c} + \hline + \bfseries B & \\ + \hline + starting point & "random" \\ + number steps & 2000 \\ + style loss score weight & 1e6 \\ + content loss score weight$^1$ & 1e3\\ + \hline + \bfseries C & \\ + \hline + starting point & "random" \\ + number steps & 2000 \\ + style loss score weight & 1e6 \\ + content loss score weight$^1$ & 8e2\\ + \hline + \bfseries D & \\ + \hline + starting point & "random" \\ + number steps & 2000 \\ + style loss score weight & 1e6 \\ + content loss score weight$^1$ & 5e3\\ + \hline + \bfseries E & \\ + \hline + starting point & "random" \\ + number steps & 2000 \\ + style loss score weight & 1e6 \\ + content loss score weight$^1$ & 5e2\\ + \hline + \bfseries F & \\ + \hline + starting point & "random" \\ + number steps & 2000 \\ + style loss score weight & 1e6 \\ + content loss score weight$^1$ & 5e2\\ + \hline + \end{tabular} +\footnotesize{ + \\$^1$ This weighting $\alpha$ results from the weight ratio $\frac{\alpha}{\beta}$ given in the paper and the style loss score weight $\beta$. +} +\end{table*} + +\clearpage +\begin{figure}[H] + \centering + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/source/tuebingen_neckarfront__andreas_praefcke.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{A}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__B_paper.jpg}}; + \node at (a.south west)[anchor=center,xshift=10mm,yshift=5mm] + {\includegraphics[width=0.45\textwidth]{images/gatys_ecker_bethge_2016/source/shipwreck_of_the_minotaur__turner.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{B}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__C_paper.jpg}}; + \node at (a.south west)[anchor=center,xshift=10mm,yshift=5mm] + {\includegraphics[width=0.45\textwidth]{images/gatys_ecker_bethge_2016/source/starry_night__vincent_van_gogh.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{C}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__D_paper.jpg}}; + \node at (a.south west)[anchor=center,xshift=5mm,yshift=5mm] + {\includegraphics[width=0.3\textwidth]{images/gatys_ecker_bethge_2016/source/the_scream__edvard_munch.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{D}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__E_paper.jpg}}; + \node at (a.south west)[anchor=center,xshift=5mm,yshift=5mm] + {\includegraphics[width=0.3\textwidth]{images/gatys_ecker_bethge_2016/source/figure_dans_un_fauteuil__pablo_ruiz_picasso.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{E}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__F_paper.jpg}}; + \node at (a.south west)[anchor=center,xshift=10mm,yshift=5mm] + {\includegraphics[width=0.45\textwidth]{images/gatys_ecker_bethge_2016/source/composition_vii__wassily_kandinsky.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{F}}; + \end{tikzpicture} + \end{minipage} + \caption{Replication of the Figure 3 in \cite{GEB2016} with the information given in \paper{}.} + \label{fig:GEB2016_fig_2_appendix_paper} +\end{figure} + +\pagebreak +\begin{figure}[H] + \centering + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/source/tuebingen_neckarfront__andreas_praefcke.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{A}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__B.jpg}}; + \node at (a.south west)[anchor=center,xshift=10mm,yshift=5mm] + {\includegraphics[width=0.45\textwidth]{images/gatys_ecker_bethge_2016/source/shipwreck_of_the_minotaur__turner.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{B}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__C.jpg}}; + \node at (a.south west)[anchor=center,xshift=10mm,yshift=5mm] + {\includegraphics[width=0.45\textwidth]{images/gatys_ecker_bethge_2016/source/starry_night__vincent_van_gogh.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{C}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__D.jpg}}; + \node at (a.south west)[anchor=center,xshift=5mm,yshift=5mm] + {\includegraphics[width=0.3\textwidth]{images/gatys_ecker_bethge_2016/source/the_scream__edvard_munch.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{D}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__E.jpg}}; + \node at (a.south west)[anchor=center,xshift=5mm,yshift=5mm] + {\includegraphics[width=0.3\textwidth]{images/gatys_ecker_bethge_2016/source/figure_dans_un_fauteuil__pablo_ruiz_picasso.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{E}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__F.jpg}}; + \node at (a.south west)[anchor=center,xshift=10mm,yshift=5mm] + {\includegraphics[width=0.45\textwidth]{images/gatys_ecker_bethge_2016/source/composition_vii__wassily_kandinsky.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{F}}; + \end{tikzpicture} + \end{minipage} + \caption{Replication of the Figure 3 in \cite{GEB2016} with the information given in \implementation{}.} + \label{fig:GEB2016_fig_2_appendix_impleentation} +\end{figure} +\pagebreak +\begin{figure}[H] + \centering + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/source/tuebingen_neckarfront__andreas_praefcke.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{A}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/author/fig_2__B__impl_params__author2.jpg}}; + \node at (a.south west)[anchor=center,xshift=10mm,yshift=5mm] + {\includegraphics[width=0.45\textwidth]{images/gatys_ecker_bethge_2016/source/shipwreck_of_the_minotaur__turner.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{B}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/author/fig_2__C__impl_params__author2.jpg}}; + \node at (a.south west)[anchor=center,xshift=10mm,yshift=5mm] + {\includegraphics[width=0.45\textwidth]{images/gatys_ecker_bethge_2016/source/starry_night__vincent_van_gogh.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{C}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/author/fig_2__D__impl_params__author2.jpg}}; + \node at (a.south west)[anchor=center,xshift=5mm,yshift=5mm] + {\includegraphics[width=0.3\textwidth]{images/gatys_ecker_bethge_2016/source/the_scream__edvard_munch.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{D}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/author/fig_2__E__impl_params__author2.jpg}}; + \node at (a.south west)[anchor=center,xshift=5mm,yshift=5mm] + {\includegraphics[width=0.3\textwidth]{images/gatys_ecker_bethge_2016/source/figure_dans_un_fauteuil__pablo_ruiz_picasso.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{E}}; + \end{tikzpicture} + \end{minipage} + \hfill + \begin{minipage}[t]{0.45\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/author/fig_2__F__impl_params__author2.jpg}}; + \node at (a.south west)[anchor=center,xshift=10mm,yshift=5mm] + {\includegraphics[width=0.45\textwidth]{images/gatys_ecker_bethge_2016/source/composition_vii__wassily_kandinsky.jpg}}; + \node at (a.north west)[anchor=center, xshift=-2.5mm, yshift=-5mm] {\large \textbf{F}}; + \end{tikzpicture} + \end{minipage} + \caption{Replication of the Figure 3 in \cite{GEB2016} with the information provided by the author.} + \label{fig:GEB2016_author_fig_2_appendix} +\end{figure} + +\newpage +\subsection{Gatys et al. 2017} +\begin{figure}[H] + \vspace{19cm} + \begin{center} + \begin{rotate}{90} + \footnotesize + \centering + \begin{tabular}{c|c|c} + \hline + \bfseries Parameter & \bfseries Implementation & \bfseries Paper\\ + \hline\hline + image size & 512 & (500, 1024)$^4$ \\ + image resize & Resize with bilinear interpolation (Pillow) & Resize with bilinear interpolation (Pillow)\\ + starting point & "content" & "content" \\ + number steps & 500 & (500, 200)$^4$ \\ + Pretrained Encoder & VGG19 with caffe weights$^1$ & VGG19 with caffe weights$^1$ \\ + Pre- and Postprocessing & Caffe$^2$ & Caffe$^2$\\ + \hline + \bfseries Optimizer & &\\ + \hline + type & LBFGS & LBFGS\\ + learning rate & 1.0 & 1.0\\ + max iter & 1 & 1 \\ + \hline + \bfseries Content Loss & & \\ + \hline + layer & "relu4\_2" & "conv4\_2" \\ + score weight & 1e0 & 1e0 \\ + loss reduction & "mean" & "mean" \\ + \hline + \bfseries Style Loss & & \\ + \hline + layer & ("relu1\_1", "relu2\_1", "relu3\_1", "relu4\_1", "relu5\_1") & ("conv1\_1", "conv2\_1", "conv3\_1", "conv4\_1", "conv5\_1") \\ + score weight & 1e3 & 1e3 \\ + score correction factor & 1e0 & 1.0 / 4.0 \\ + layer weights & (2.4e-04, 6.1e-05, 1.5e-05, 3.8e-06, 3.8e-06)$^3$ & (2.4e-04, 6.1e-05, 1.5e-05, 3.8e-06, 3.8e-06)$^3$ \\ + \hline + \multicolumn{3}{l}{ }\\ + \multicolumn{3}{l}{\scriptsize{$^1$ Multi-layer encoder based on the VGG architecture that was introduced by Simonyan and Zisserman in \cite{SZ2015}.}}\\ + \multicolumn{3}{l}{\scriptsize{$^2$ Pre- and Postprocessing for the caffe weights \cite{SZ2015}.}}\\ + \multicolumn{3}{l}{\scriptsize{$^3$ The layer\_weights are computed by $1/n^2$ where $n$ denotes the number of channels of a feature map from the corresponding layer in the pretrained Encoder.}}\\ + \multicolumn{3}{l}{\scriptsize{$^4$ An image pyramid with two levels has been used.}}\\ + \end{tabular} +\end{rotate} +\captionof{table}{The hyperparameters used from the implementation and paper of \etal{Gatys} $2017$ \cite{GEB+2017}.} +\label{tab:GEB+2017_parameters} +\end{center} +\end{figure} + +\begin{figure}[H] + \centering + \begin{minipage}[t]{0.25\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=0.9\textwidth]{images/gatys_et_al_2017/source/house_concept_tillamook.jpg}}; + \node at (a.north east)[anchor=north east,xshift=0mm,yshift=0mm] + {\includegraphics[width=0.2\textwidth]{images/gatys_et_al_2017/source/house_concept_tillamook/building.png}}; + \end{tikzpicture}\\ + \centering + Content + \end{minipage} + \begin{minipage}[t]{0.25\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=0.9\textwidth]{images/gatys_et_al_2017/source/watertown__shop602835_store.jpg}}; + \node at (a.north east)[anchor=north east,xshift=0mm,yshift=0mm] + {\includegraphics[width=0.2\textwidth]{images/gatys_et_al_2017/source/watertown__shop602835_store/building.png}}; + \end{tikzpicture}\\ + \centering + Style I + \end{minipage} + \begin{minipage}[t]{0.25\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=0.75\textwidth]{images/gatys_et_al_2017/source/wheat_field_with_cypresses__vincent_van_gogh.jpg}}; + \node at (a.north east)[anchor=north east,xshift=0mm,yshift=0mm] + {\includegraphics[width=0.2\textwidth]{images/gatys_et_al_2017/source/wheat_field_with_cypresses__vincent_van_gogh/foreground.png}}; + \end{tikzpicture} + Style II + \end{minipage}\\ + \vspace{0.5cm} + \begin{minipage}[t]{0.75\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_2__d_paper.jpg} + \end{minipage}\\ + \begin{minipage}[t]{0.75\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_2__e_paper.jpg} + \end{minipage}\\ + \begin{minipage}[t]{0.75\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_2__f_paper.jpg} + \end{minipage} + \caption{Replication of the Figure 2 in \cite{GEB+2017} with the information given in \paper{}.} + \label{fig:GEB+2017_fig2_appendix_paper} +\end{figure} + +\begin{figure}[H] + \centering +\begin{minipage}[t]{0.25\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=0.9\textwidth]{images/gatys_et_al_2017/source/house_concept_tillamook.jpg}}; + \node at (a.north east)[anchor=north east,xshift=0mm,yshift=0mm] + {\includegraphics[width=0.2\textwidth]{images/gatys_et_al_2017/source/house_concept_tillamook/building.png}}; + \end{tikzpicture}\\ + \centering + Content +\end{minipage} +\begin{minipage}[t]{0.25\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=0.9\textwidth]{images/gatys_et_al_2017/source/watertown__shop602835_store.jpg}}; + \node at (a.north east)[anchor=north east,xshift=0mm,yshift=0mm] + {\includegraphics[width=0.2\textwidth]{images/gatys_et_al_2017/source/watertown__shop602835_store/building.png}}; + \end{tikzpicture}\\ + \centering + Style I +\end{minipage} +\begin{minipage}[t]{0.25\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=0.75\textwidth]{images/gatys_et_al_2017/source/wheat_field_with_cypresses__vincent_van_gogh.jpg}}; + \node at (a.north east)[anchor=north east,xshift=0mm,yshift=0mm] + {\includegraphics[width=0.2\textwidth]{images/gatys_et_al_2017/source/wheat_field_with_cypresses__vincent_van_gogh/foreground.png}}; + \end{tikzpicture} + Style II +\end{minipage}\\ + \vspace{0.5cm} + \begin{minipage}[t]{0.75\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_2__d.jpg} + \end{minipage}\\ + \begin{minipage}[t]{0.75\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_2__e.jpg} + \end{minipage}\\ + \begin{minipage}[t]{0.75\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_2__f.jpg} + \end{minipage} + \caption{Replication of the Figure 2 in \cite{GEB+2017} with the information given in \implementation{}.} + \label{fig:GEB+2017_fig2_appendix_implementation} +\end{figure} +\newpage +\subsection{Li and Wand 2016} +\begin{figure}[H] + \vspace{17cm} + \begin{center} + \begin{rotate}{90} + \footnotesize + \centering + \begin{tabular}{c|c|c} + \hline + \bfseries Parameter & \bfseries Implementation & \bfseries Paper\\ + \hline\hline + image size & 384 & 384 \\ + image resize$^3$ & Resize with bicubic interpolation (Pillow) & Resize with bicubic interpolation (Pillow)\\ + starting point & "content" & "random" \\ + number steps & & $^4$ \\ + Pretrained Encoder & VGG19 with caffe weights$^1$ & VGG19 with caffe weights$^1$ \\ + Pre- and Postprocessing & Caffe$^2$ & Caffe$^2$\\ + \hline + \bfseries Optimizer & &\\ + \hline + type & LBFGS & LBFGS \\ + learning rate & 1.0 & 1.0 \\ + max iter & 1 & 1 \\ + \hline + \bfseries Content Loss & & \\ + \hline + layer & "relu4\_1" & "relu4\_2" \\ + score weight & 2e1 & 1e0 \\ + loss reduction & "mean" & "sum" \\ + \hline + \bfseries Style Loss & & \\ + \hline + layer & ("relu3\_1", "relu4\_1") & ("relu3\_1", "relu4\_1") \\ + score weight & 1e-4 & 1e0 \\ + score correction factor & 1.0 / 2.0 & 1.0 \\ + loss reduction & "sum" & "sum" \\ + patch size & 3 & 3\\ + stride & 2 & 1\\ + \hline + \bfseries Regularization Loss & & \\ + \hline + score weight & 1e-3 & 1e-3 \\ + score correction factor & 1.0 / 2.0 & 1.0 \\ + loss reduction & "sum" & "sum" \\ + \hline + \bfseries Image Pyramid & & \\ + \hline + max edge size & 384 & 384 \\ + min edge size & 64 & 64 \\ + edge & "long" & "long" \\ + num steps & 100 & 200 \\ + num levels & 3 & None$^4$ \\ + \hline + \multicolumn{3}{l}{ }\\ + \multicolumn{3}{l}{\scriptsize{$^1$ Multi-layer encoder based on the VGG architecture that was introduced by Simonyan and Zisserman in \cite{SZ2015}.}}\\ + \multicolumn{3}{l}{\scriptsize{$^2$ Pre- and Postprocessing for the caffe weights \cite{SZ2015}.}}\\ + \multicolumn{3}{l}{\scriptsize{$^3$ For some of the images, further transformations, such as cropping and mirroring, are performed beforehand to get the same image as in the paper.}}\\ + \multicolumn{3}{l}{\scriptsize{$^4$ num levels=None implies that the number of levels is automatically calculated depending on max edge size and min edge size.}} + \end{tabular} +\end{rotate} +\captionof{table}{The hyperparameters used from the implementation and paper of \textsc{Li} and \textsc{Wand} $2016$ \cite{LW2016}.} +\label{tab:LW2016_parameters} +\end{center} +\end{figure} + +\subsection{Johnson, Alahi and Li 2016} + +\begin{figure}[H] + \vspace{17cm} + \begin{center} + \begin{rotate}{90} + \footnotesize + \centering + \begin{tabular}{c|c|c} + \hline + \bfseries Parameter & \bfseries Implementation & \bfseries Paper\\ + \hline\hline + content image size & (256, 256)$^6$ & (256, 256)$^6$ \\ + style image size & 256 for the long edge & 256 for the long edge\\ + image resize$^3$ & Resize with bilinear interpolation (Pillow) & Resize with bilinear interpolation (Pillow)\\ + Pretrained Encoder & VGG16 with caffe weights$^1$ & VGG16 with caffe weights$^1$ \\ + Pre- and Postprocessing & Caffe$^2$ & Caffe$^2$\\ + \hline + \bfseries Optimizer & &\\ + \hline + type & Adam & Adam \\ + learning rate & 1e-3 & 1e-3 \\ + number batches & 40000 & 40000\\ + batch size & 4 & 4 \\ + \hline + \bfseries Content Loss & & \\ + \hline + layer & "relu2\_2" & "relu2\_2" \\ + score weight & 1e0 & - \\ + \hline + \bfseries Style Loss & & \\ + \hline + layers & ("relu1\_2", "relu2\_2", "relu3\_3", "relu4\_3")& ("relu1\_2", "relu2\_2", "relu3\_3", "relu4\_3")\\ + additional factor & 5e0 & - \\ + \hline + \bfseries Regularization Loss & & \\ + \hline + score weight & 1e-6 & - \\ + loss function & squared error$^5$ & squared error$^5$ \\ + \hline + \hline + \bfseries Transformer & & \\ + \hline + value range delimiter & 150 * tanh(x) & sigmoid(2.0*x)\\ + \hline +\multicolumn{3}{l}{ }\\ +\multicolumn{3}{l}{\scriptsize{$^1$ Multi-layer encoder based on the VGG architecture that was introduced by Simonyan and Zisserman in \cite{SZ2015}.}}\\ +\multicolumn{3}{l}{\scriptsize{$^2$ Pre- and Postprocessing for the caffe weights \cite{SZ2015}.}}\\ +\multicolumn{3}{l}{\scriptsize{$^3$ For some of the images, further transformations, such as cropping and mirroring, are performed beforehand to get the same image as in the paper.}}\\ +\multicolumn{3}{l}{\scriptsize{$^4$ Normalize the loss additionally by the number of channels $n$.}}\\ +\multicolumn{3}{l}{\scriptsize{$^5$ This is in contrast to the \texttt{pystiche} module where the mean square error is used.}}\\ +\multicolumn{3}{l}{\scriptsize{$^6$ The images are first brought to a square size with a cropping of the upper left corner.}} +\end{tabular} +\end{rotate} +\captionof{table}{The hyperparameters used from the implementation and paper of \etal{Johnson} $2016$ \cite{JAL2016}.} +\label{tab:JAL2016_parameters} +\end{center} +\end{figure} +\clearpage +\begin{table*}[!t] + \renewcommand{\arraystretch}{1.3} + \caption{An overview of the changed parameters during the training of the provided trained models in \cite{JAL2016}.} + \label{tab:johnson_adapted_hyperparams} + \centering + \begin{tabular}{c|c} + \hline + \bfseries Candy & \\ + \hline\hline + instance norm & True \\ + style loss score weight & 1e1 \\ + regularization score weight & 1e-4\\ + style edge size$^1$ & 384\\ + \hline + \bfseries composition vii & \\ + \hline + instance norm & False \\ + style edge size$^1$ & 512\\ + number of batches & 60000\\ + \hline + \bfseries feathers & \\ + \hline + instance norm & True \\ + style loss score weight & 1e1 \\ + regularization score weight & 1e-5\\ + style edge size$^1$ & 180\\ + number of batches & 60000\\ + \hline + \bfseries mosaic & \\ + \hline + instance norm & True \\ + style loss score weight & 1e1 \\ + regularization score weight & 1e-5\\ + style edge size$^1$ & 512\\ + number of batches & 60000\\ + \hline + \bfseries starry night & \\ + \hline + instance norm & False \\ + style loss score weight & 3e0 \\ + regularization score weight & 1e-5\\ + style edge size$^1$ & 512\\ + \hline + \bfseries the scream & \\ + \hline + instance norm & True \\ + style loss score weight & 2e1 \\ + regularization score weight & 1e-5\\ + style edge size$^1$ & 384\\ + number of batches & 60000\\ + \hline + \end{tabular} + \footnotesize{ + \\$^1$ The edge size refers to the length of the smaller image dimension of a correspondingly scaled image size.} +\end{table*} +\clearpage + +\begin{figure}[H] + \begin{minipage}[t]{0.3\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/source/chicago.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/source/hoovertowernight.jpg} + \end{minipage} + \hfill \\ candy \hfill \\ + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=0.7\textwidth]{images/johnson_alahi_li_2016/source/candy.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__candy__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/hoovertowernight__candy__impl_params__instance_norm.jpg} + \end{minipage} + \hfill \\ composition vii \hfill \\ + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/source/composition_vii.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__composition_vii__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/hoovertowernight__composition_vii__impl_params__instance_norm.jpg} + \end{minipage} + \hfill \\ feathers \hfill \\ + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=0.5\textwidth]{images/johnson_alahi_li_2016/source/feathers.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__feathers__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/hoovertowernight__feathers__impl_params__instance_norm.jpg} + \end{minipage} + + \hfill \\ mosaic \hfill \\ + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/johnson_alahi_li_2016/source/mosaic.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__mosaic__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/hoovertowernight__mosaic__impl_params__instance_norm.jpg} + \end{minipage} + \hfill \\ starry night \hfill \\ + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/source/starry_night.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__starry_night__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/hoovertowernight__starry_night__impl_params__instance_norm.jpg} + \end{minipage} + \hfill \\ the scream \hfill \\ + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=.5\textwidth]{images/johnson_alahi_li_2016/source/the_scream.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__the_scream__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/hoovertowernight__the_scream__impl_params__instance_norm.jpg} + \end{minipage} + \label{fig:johnson_alahi_li_2016_fig_appendix} + \caption{Our stylisation results of the two content images (top row) due to the transformation of the models we trained in different styles with the approach of \cite{JAL2016}. The respective style images are shown in the left column.} +\end{figure} + +\begin{figure}[H] + \begin{minipage}[t]{0.3\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/source/chicago.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/source/hoovertowernight.jpg} + \end{minipage} + \hfill \\ candy \hfill \\ + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=0.7\textwidth]{images/johnson_alahi_li_2016/source/candy.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/chicago__candy__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/hoovertowernight__candy__impl_params__instance_norm.jpg} + \end{minipage} + \hfill \\ composition vii \hfill \\ + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/source/composition_vii.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/chicago__composition_vii__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/hoovertowernight__composition_vii__impl_params__instance_norm.jpg} + \end{minipage} + \hfill \\ feathers \hfill \\ + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=0.5\textwidth]{images/johnson_alahi_li_2016/source/feathers.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/chicago__feathers__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/hoovertowernight__feathers__impl_params__instance_norm.jpg} + \end{minipage} + + \hfill \\ mosaic \hfill \\ + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/johnson_alahi_li_2016/source/mosaic.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/chicago__mosaic__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/hoovertowernight__mosaic__impl_params__instance_norm.jpg} + \end{minipage} + \hfill \\ starry night \hfill \\ + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/source/starry_night.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/chicago__starry_night__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/hoovertowernight__starry_night__impl_params__instance_norm.jpg} + \end{minipage} + \hfill \\ the scream \hfill \\ + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=.5\textwidth]{images/johnson_alahi_li_2016/source/the_scream.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/chicago__the_scream__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/hoovertowernight__the_scream__impl_params__instance_norm.jpg} + \end{minipage} + \label{fig:johnson_alahi_li_2016_author_model_appendix} + \caption{Our stylisation results of the two content images (top row) due to the transformation of the models in different styles provided by the authors of the original publication \cite{JAL2016}. The respective style images are shown in the left column.} +\end{figure} + +\subsection{Ulyanov et al. 2016} + +\begin{figure}[H] + \begin{minipage}[t]{0.24\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/tuebingen_neckarfront__andreas_praefcke.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/kitty.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/bird.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/turner.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/turner_neckarfront_coco__impl_params.png} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/turner_kitty_coco__impl_params.png} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/turner_bird_coco__impl_params.png} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/pleades.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/pleades_neckarfront_coco__impl_params.png} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/pleades_kitty_coco__impl_params.png} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/pleades_bird_coco__impl_params.png} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/mosaic.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/mosaic_neckarfront_coco__impl_params.png} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/mosaic_kitty_coco__impl_params.png} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/mosaic_bird_coco__impl_params.png} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/candy.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/candy_neckarfront_coco__impl_params.png} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/candy_kitty_coco__impl_params.png} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/candy_bird_coco__impl_params.png} + \end{minipage} + \caption{Our stylisation results of the content images (top row) due to the transformation of the models we trained in different styles with the approach of \cite{ULVL2016}. The respective style images are shown in the left column.} + \label{fig:ulyanov_et_al_2016_impl} +\end{figure} + diff --git a/rescience_paper/sections/conclusion.tex b/rescience_paper/sections/conclusion.tex new file mode 100644 index 00000000..8c1a9b2b --- /dev/null +++ b/rescience_paper/sections/conclusion.tex @@ -0,0 +1,3 @@ +\section{Conclusion} + +In this replication study, a few of the basic algorithms are replicated in \gls{NST}. For this purpose, the basic operation of these methods is first introduced. Based on the differences observed between the documented and the reference implementation, the differences found are then described, followed by a discussion and analysis of their impact on the result. This is supported with examples. In addition, a solution is proposed to reduce these problems due to the lack of standardisation. \ No newline at end of file diff --git a/rescience_paper/sections/discussion.tex b/rescience_paper/sections/discussion.tex new file mode 100644 index 00000000..bd394246 --- /dev/null +++ b/rescience_paper/sections/discussion.tex @@ -0,0 +1,35 @@ +\newpage +\section{Summary} +In this replication study, we replicated six well-known \gls{NST} papers. In all papers, the approaches of the original papers were clearly described, but they were not always sufficient to repeat the approach and produce stylised images. The reason for this are the differences between the approach described in \paper{} and the approach implemented in \implementation{} described in \secref{sec:replicability}. \tabref{tab:replication_results} shows an overview of replications that were successful, unsuccessful, and probably successful by other hyperparameters. +\begin{table*}[h] + \caption{Overview of replications that have been successfully replicated \cmark{}, unsuccessfully replicated \xmark{} and partially successfully replicated \tcmark{}. We suspect that the replications marked with \tcmark{} may be successful by other hyperparameters.} + \label{tab:replication_results} + \centering + \small + \begin{tabular}{c|c|c} + \hline + \bfseries Replication & \bfseries \paper{}& \bfseries \implementation{}\\ + \hline\hline + \textsc{Gatys}, \textsc{Ecker}, and \textsc{Bethge} \cite{GEB2016} & \xmark{} & \tcmark{}\\ + \textsc{Li} and \textsc{Wand} \cite{LW2016} & \xmark{} & \tcmark{}\\ + \etal{Gatys} \cite{GEB+2017} & \cmark{} & \cmark{}\\ + \etal{Johnson} \cite{JAL2016} & \xmark{} & \cmark{} \\ + \etal{Ulyanov} \cite{ULVL2016} & \xmark{} & \tcmark{} \\ + \etal{Ulyanov} \cite{UVL2017} & \xmark{} & \xmark{} \\ + \hline + \end{tabular} +\end{table*} + +We were able to replicate only one of the six papers successfully with the information from \paper{}. The successful replications also showed small changes compared to the results with \implementation{}. This was because the changes had a minor impact on the optimisation process and additional factors were of a magnitude that hardly changed the style transfer significantly. In the other five replications, the deviations from \implementation{} resulted in either no stylisation of the images or unusable images. The reason for this is that the given information in \paper{} caused the training to fail or that the additional or missing factors were of an order of magnitude that significantly altered the weight between content and style. + +The replication of \implementation{} were more successful. We were able to achieve good stylised results for almost all replications. Overall, we were able to replicate five of the six papers with comparable results with \implementation{}. In the other cases, the results differed from the original results. These differences could not only be explained by a random initialisation of the models or the starting point. As it turned out, this was mainly due to the wrong hyperparameter. By communicating with the authors, we were able to improve the results or identify reasons such as a random starting point in the case of the replication results in \secref{sec:Gatys1results}. However, in many cases the hyperparameters used at that time could no longer be traced, which is why some of our results differ from those published by the original authors. + +The implementation of the replication was easy to implement due to the comprehensible and traceable descriptions of the authors. With the help of the preliminary work with the \texttt{pystiche} library it was thus possible to replicate the approaches in \texttt{pyTorch}, since the respective components are integrated for the individual approaches and only had to be assembled for the replication. This has reduced the effort to the essential points that cannot be taken over by \texttt{pystiche}, such as the architecture of the transformer, the loading of the required data, as well as the specific settings and hyperparameters. In addition, due to the differences, individual components had to be adapted, making the implementation more complex. This required that individual components from the library be modified to replicate the approaches as accurately as possible. This reusable code paves the way for future investigations of \gls{NST}. It gives researchers the ability to compare their approaches with others more easily. Finally, we hope that the code provided by the library \texttt{pystiche} will serve as a basis for new \gls{NST} approaches and the results of this replication study are used to compare the approaches. + + + + + + + + diff --git a/rescience_paper/sections/introduction.tex b/rescience_paper/sections/introduction.tex new file mode 100644 index 00000000..77c3ec31 --- /dev/null +++ b/rescience_paper/sections/introduction.tex @@ -0,0 +1,27 @@ +\section{Introduction} +Humans have always been attracted and inspired by the art of painting. This attraction is explained by the interplay between the content and style of the painting, with which the artists create extraordinary visual experiences \cite{Glas2021}. Whether it is possible to teach this ability to a machine is still unknown, since solving this task with a traditional supervised learning approach is largely impractical. This is because the pairs of images needed to train a \gls{ML} model, the original image and an artistic representation of that image, rarely exist \cite{JAL2016}. + +However, in recent years, there are artificial systems that generate artistic images of high perceptual quality based on \gls{DL} algorithms. This technique of recomposing images in the style of other images is called \gls{NST} and was introduced by \textsc{Gatys}, \textsc{Ecker}, and \textsc{Bethge} in $2016$ \cite{GEB2016}. An example of a content image receiving the style and a style image from which the style is transferred, as well as the result of the \gls{NST}, can be seen in \figref{fig:fig_nst}. + +The transformative power of this technique is that almost anyone can create and share an artistic masterpiece. This allows people all over the world to experiment with their own creativity \cite{Kel2018}. On +\begin{figure*}[h] + \centering + \includestandalone{nst/example_images} + \caption{Example of a Neural Style Transfer with the content image \contentimage{} (left), the used style image \styleimage{} (middle) and the stylised image \image{} (right).} + \label{fig:fig_nst} +\end{figure*} +the other hand, the significance can also be observed in the commercial art world. In $2018$, Christie's featured Artificial Intelligence artwork sold at one of their auctions for $\$432.500$ \cite{2018}. Furthermore, the transfer of styles to recorded and live video opens many doors in the areas of design, content creation, and creative tool development. For example, \gls{NST} can be applied in various ways to photo and video editors, virtual reality, gaming, and commercial art \cite{Glas2021, Ioa2021}. + +These multiple possible applications have led to \gls{NST} becoming a trending topic in academic literature in recent years. This is underlined by the number of citations to the initial paper \eqq{Image style transfer using convolutional neural networks} \cite{GEB2016}. In the past $6$ years, according to Google Scholar\urlfootnotename{Overview of the papers in which the paper or preprint is referenced}{https://scholar.google.de/scholar?oi=bibs&hl=en&cites=15430064963552939126,6343685530593283491,788840246532963346,18334534842043149041}{Google Scholar Link}{30.08.2022}, this paper has been cited over $6200$ times. + +The problem with such a rapid development of publications is the lack of standardisation, which is usually introduced after the initial boom. Without a standardisation, the approaches exist in different programming languages, as well as \glspl{DLF}. This makes a direct combination impossible and a comparison of the methods difficult, although the authors of the methods provide the source code of their approaches. + +During the review of the reference implementations, we also noticed that there are discrepancies between the published algorithm and the provided implementation. These range from incorrectly specified or not specified hyperparameters to minor implementation errors to significant changes in the algorithms compared to the publications. This makes an exact replication of the results more difficult or even impossible. We suggest that the discrepancies are not deliberate deceptions, but rather artefacts due to lack of standardisation. + +Based on these observations, we took two steps. First, we have introduced a standardisation for the implementation of \gls{NST}. For this purpose, the public \gls{NST} library with the name \texttt{pystiche} was created \cite{ML2020}. This library requires only a minimum of prior knowledge about \gls{NST} and \gls{DL} and is also flexible enough to combine the different approaches and does not limit the scope of action. The \texttt{pystiche} library is based on and is fully compatible with \texttt{PyTorch} \cite{PGM+2019}. + +In a second step, we replicated known \gls{NST} approaches using \texttt{pystiche}. In addition to \textsc{Gatys}'s, \textsc{Ecker}'s, and \textsc{Bethge}'s initialisation paper \cite{GEB2016}, we have replicated other image-based approaches in which a single image is stylised using an optimisation algorithm \cite{LW2016,GEB+2017}. Furthermore, we have replicated model-based \gls{NST} approaches \cite{JAL2016,ULVL2016,UVL2017}, where a model is learned that can subsequently be used for stylising any content images. These replicated approaches cover a wide range of current approaches and can be used as a basis for new approaches. In addition, the implementation of the replications facilitates a comparison of new approaches with the existing approaches. + +We have replicated a total of six known \gls{NST} procedures based on the library \texttt{pystiche} in this replication study. Due to the fact that discrepancies between the paper and the reference implementation were found in each study, we replicated each approach in two different ways. One replication using only the information from the paper (\paper{}) and one replication using the default information from the reference implementation (\implementation{}). This highlights the open problem of lack of replicability and underlines once again the need for \gls{NST} libraries like \texttt{pystiche} beyond this study. + +The paper is structured as follows. In the next section, the basic functionality of \gls{NST} is introduced. In the following third section, the implementations reproduced in this study and the reason for their selection are briefly presented. The fourth section explains the replication methodology and how we dealt with the discrepancies between the implementations and the paper. Finally, the results and their significance are discussed. \ No newline at end of file diff --git a/rescience_paper/sections/methology.tex b/rescience_paper/sections/methology.tex new file mode 100644 index 00000000..0841834f --- /dev/null +++ b/rescience_paper/sections/methology.tex @@ -0,0 +1,114 @@ +\section{Methodology of Replicability} +In this study, we have implemented the \gls{NST} approaches presented in \secref{sec:replicated_paper} based on the \gls{NST} library \texttt{pystiche} \cite{ML2020} in \texttt{pyTorch}. Although the authors have incorporated further images from additional experiments in the paper, we have only replicated the images that show the result of the underlying approach in order to determine whether the replication of the approach is successful. + +As mentioned earlier, we found differences between the approach published in the paper and the reference implementation in all replications. In most replications, these differences have an impact on the result, either without stylisation at all, the result looks visually different, or only unusable images are created. For this reason, the next section explains which differences were found. It also explains how we dealt with these differences. Then the information on the images and datasets used for each approach is given. Finally, we discuss the computational requirements we used for this study. + +\subsection{Parameter deviations} \label{sec:replicability} +We replicated each approach once with the information from the paper and once with the default values from the reference implementation. An overview of the parameters used can be found in the tables in the \appref{sec:parameters_appendix}. The tables contain the information of the authors published in their paper and the default values from the implementation. Furthermore, a detailed documentation of the hyperparameters used can be found in the documentation of \texttt{pystiche/papers}\footnoteref{docs}. + +The deviations between the parameters described in the paper and the original implementation are marked by comments. In addition, links are provided with the location of the deviation in an archived version of the original implementation at the time of this replication study. The following designation is chosen to distinguish between the original paper and the reference implementation: +\begin{itemize} + \item \paper{} -- Refers to the replication only with the information from the original paper. + \item \implementation{} -- Refers to the replication only with the information from the reference implementation. +\end{itemize} + +In general, the deviations can be divided into two categories. The first category is the behavioral changes that probably result from author misconceptions about how their used framework or library works internally. The second category \eqq{hyperparameters} result from the different specifications in the documentation and the finally implemented approach. + +Another influencing factor for this category is the high number of different hyperparameters needed for the implementation. These range from the I/O periphery of the images, to the pre- and postprocessing methods applied, to the specific hyperparameters for the loss functions, which makes it complex to document accurately. Information on both types of changes can be found for each paper implementation in the respective \eqq{Behavior Changes} and \eqq{Hyperparameters} sections of the documentation. + +In the following, we describe these categories and subcategories in more detail and explain how we deal with them. The examples do not appear in all replications, but serve only to illustrate the observed deviations in this replication study. + +\subsubsection{Behavioral changes} +The behavioral changes are modified parameters that result from different specifications in \paper{} and \implementation{}. However, there should be no influence on these parameters to change the actual style transfer. + +This type of error includes for example the use of the \gls{MSE} in \implementation{}, while the \gls{SE} is specified in \paper{}. This category also includes additional norms, such as a division of the number of channels, as well as additional pre-factors. The latter is mainly due to the specification of additional factors in the paper in the loss function which makes the expression of the gradient prettier, but are not integrated by default in the loss functions of the \gls{DLF} used in \implementation{}. + +An example of this is a factor $1/2$, which cancels out the $2$ from the square term in the gradient of the \gls{MSE} loss function. All these examples have an influence on the calculation of the loss functions. This changes the weighting between the content and the style, which can result in differences, as shown in \figref{fig:weighting_nst}. + +Another example of the behavioral changes are changes to the transformer structure in the model-based approaches. This can be different indications of the value range delimiter for the transformer output or different activation functions (LeakyReLU instead of ReLU). There may also be differences in the basic structure of the transformer by using additional or fewer blocks than specified in \paper{}. These differences influence what kind of transformation is learned in the transformer. This changes thus have an influence on the behaviour of the algorithm, but are not intended to have a direct influence on the style transfer. + +All these behavioural changes are binary, whether from \paper{} or \implementation{}. We have therefore implemented both variants for the respective replication, and it is possible to switch between them. + +\subsubsection{Hyperparameters} +In contrast to the behavioral changes, the changes of the category Hyperparameter are freely selectable and are suitable for influencing the style transfer positively by adapting. These parameters include the learning rate, the used layers from the encoder, the used loss weights of the individual loss functions, the number of optimisation steps, as well as other approach-specific parameters. + +Unfortunately, there are different information about the parameters used in \paper{} and \implementation{}. The differences range from different values, different information about the used interpolation or layers, to no specific information about the value at all. However, these parameters are essential for accurate replication, as shown in \secref{sec:preliminaries}. + +One reason for the differences is that it is difficult to properly document all these parameters and know exactly which parameters were used for a particular image/model, as the code is constantly changing before and sometimes after release. It can also become complex to correctly document all standardisations and mean calculations in the formulas in \paper{}, which may result in additional or missing factors. + +An example of this category are the layers used from the pre-trained networks. There are different statements about the depth of the layers, the number of the layers used and the feature map, i.e. whether the features before or after the activation function are selected. The fact that the choice of layers has an influence on the result has already been shown in \figref{fig:diff_depth_nst}. + +Another observed point of this category of changes are different information about important settings, such as the choice of starting point and the number of iteration steps. These are essential parameters for accurate replication, as shown in \figref{fig:iteration_nst} and \figref{fig:diff_init_nst}. However, it could be that such an important parameter is not specified in \paper{} and can only be taken from \implementation{} or two fundamentally different information are given. + +All these parameters have an influence on the final result and are necessary to determine if the replication is correct and if differences are due to rounding errors or different initialisations. For this reason, similar to the behavioral changes, we have used the information given in \paper{} and \implementation{} as default values. An overview of the parameters used for the respective replications can be found in the tables in \appref{sec:parameters_appendix}. In the case of missing information, we have used the available information for both variants. + +Since these parameters are intended to improve the stylisation, it is also possible that different parameters have been used for a given style image. These variations from the default values are indicated for the respective images or model trainings. + +\subsubsection{Pre-trained Encoder} +Another important point for the replication of \gls{NST} approaches is the choice of the pre-trained encoder $E$. There are different architectures that can be used for the calculation of the loss functions. The most commonly used models are those of the \gls{VGG} \cite{SZ2015}. Some \gls{DLF} provide their own pre-trained weights for these networks. It is therefore important to choose the same settings as in the original approach. Otherwise, the features used for the optimisation for the images do not match and the optimisation process can no longer match. + +This also applies to the necessary pre- and postprocessing steps for these pre-trained encoders. This data preparation involves a standardisation with the network-specific mean and standard deviation. However, the replicated paper rarely provide information on whether and when this step will be taken. This makes it difficult to trace this step, especially with model-based approaches, so that different implementations can occur. + +We have used and initialised the encoders according to the cited sources in the original papers. The Pre- and postprocessing are steps that are implied by the pre-trained encoders used, so we have integrated this step at the places where the original authors have done this step. + +\subsubsection{Transformation of the images} +The images in the datasets or the images used in the image-based methods often do not exist in the size used for the approach. The images are therefore transformed to the size used. Sometimes the transformation is also necessary or useful to make the approach work at all or to provide usable images, such as images with a square image size of $2^{m}$ with $ m \in \mathbb{Z}^{+}$, so that the transformer does not change the image size due to discretisation. + +However, in \paper{} there is often little or no information about these steps available. But in \implementation{}, the images are resized to the final image size by several transformations, such as resizing and cropping. This also applies to training time. The training images used are modified during the training by numerous image augmentation. This means that the used dataset is not only resized to the right size, but also artificially enlarged by random transformations, such as rotation, translation and scaling, but also random changes in the light level or brightness of the images. Especially the latter may have an influence on the way the style is presented, as it is available in different sizes and orientations for the training. The effects of different image sizes have already been shown in \figref{fig:diff_size_nst}. As a result, it is not possible to trace exactly how the used image was created. In addition, the training in the model-based methods is altered by the artificial enlargement of the dataset, which may also have an influence on the result. + +For this replication study, we have re-implemented these transformations and use them for \implementation{}. However, since this is a comprehensible step even without information, we use a minimally necessary process, in the case that no further information is given. This means that if no further information is given, the images are resized to the required image size by resizing with a bilinear interpolation. + +\subsection{Data} \label{sec:data} +The authors of the original approaches use different content and style images, as well as different datasets for the training of the models in the model-based methods. In order to compare the replicated images, we use the images used in the original papers for the verification of the replication. + +Some of the images are available in the original GitHub repository. The images that have not been made accessible have been selected as a result of an image search. Links and license information are available in the implementation for these images. We have only used images whose license information allows them to be used in a scientific work. An overview of the sources for the content and style images used in this study can be found in the \appref{app:image_credits}. + +For the training of model-based methods, the different datasets used for the respective implementation are listed in \tabref{tab:datasets}. Each training image is preprocessed according to the steps specified in \paper{} or in \implementation{}. This includes for example image augmentation and cropping of the images. Information on this can be found in the tables in \appref{sec:parameters_appendix}. + +The following datasets are required for this replication study: + +\begin{itemize} + \item The Microsoft Common Objects in Context (MS COCO) dataset contains 164K images. The size of the dataset is 25 GB \cite{LMB+2014}.\urlfootnote{Source}{https://cocodataset.org}{28.09.2021} + \item The Large Scale Visual Recognition Challenge 2012 (ILSVR2012) is a subset of ImageNet containing 1000 categories and 1.2 million images. The size of the dataset is 154.6 GB \cite{RDS+2015}.\urlfootnote{Source}{https://image-net.org/challenges/LSVRC/2012/}{28.09.2021} +\end{itemize} + +\begin{table*}[h] + \caption{Overview of the datasets required for the replicated paper.} + \label{tab:datasets} + \centering + \small + \begin{tabular}{c|c} + \hline + \bfseries Methods & \bfseries Dataset\\ + \hline\hline + \etal{Johnson} \cite{JAL2016} & MS COCO \\ + \etal{Ulyanov} \cite{ULVL2016} & ILSVRC2012$^1$ \\ + \etal{Ulyanov} \cite{UVL2017} & ILSVRC2012 \\ + \hline + \end{tabular} +\footnotesize{ + \\$^1$ Only the validation set is specified in \implementation{}. +} +\end{table*} + +\subsection{Computional requirements} + +All our final replication scripts were performed on a machine having a Titan RTX CUDA GPU with 24 GB memory. A GPU with a lot of memory is particularly needed for model-based approaches. Our CUDA version is $11.7$ and the Nvidia driver version is $515.43.04$. The implementation also requires different libraries, which are listed in the \tabref{tab:libraries} with the version used. In addition, the datasets used must be available. + +\begin{table*}[h] + \caption{Overview of the libraries used with the version used at the time of replication.} + \label{tab:libraries} + \centering + \small + \begin{tabular}{c|c} + \hline + \bfseries Library & \bfseries Version\\ + torch & 1.12.1\\ + torchvision & 0.13.1\\ + pystiche & 1.0.1\\ + pillow & 9.2.0\\ + numpy & 1.21.6\\ + more-itertools & 8.14.0\\ + \hline + \end{tabular} +\end{table*} \ No newline at end of file diff --git a/rescience_paper/sections/preliminaries.tex b/rescience_paper/sections/preliminaries.tex new file mode 100644 index 00000000..3839675e --- /dev/null +++ b/rescience_paper/sections/preliminaries.tex @@ -0,0 +1,246 @@ +\section{Preliminaries} \label{sec:preliminaries} +In the \gls{NST} technique, it is possible to adapt the result to a style with several parameters and settings to influence it positively \cite{GEB2016}. However, the replication has shown that there are differences in the specifications of these parameters between the implementation and the approach described in the original work. These deviations can even lead to significant visual differences in the result. + +For this reason, this section gives an overview of the basic functioning of the \gls{NST}. The aim is to show which hyperparameters exist and what influence they have on the result. For this purpose, the role of pre-trained \gls{CNN} from image processing for the creation of feature representations within the \gls{NST} is explained first. On this basis, the loss function for the optimisation of the \gls{NST} is introduced for the image-based approaches. Subsequently, the additional parameters for the model-based \gls{NST} approaches are introduced. + +\subsection{Encoder} \label{sec:encoder} +For the recognition of objects in images, features are needed with which the objects can be distinguished. This task is simple for a human. For example, a bird can be recognised in an image if the object has features such as feathers, two wings, and a beak. For a machine, on the other hand, this task is complex because it has to extract the features from the pixel data \cite{Alp2020}. + +Manually programming the features required for object recognition so that they can be recognised in all different forms, if this is possible at all, is not sensible \cite{Alp2020}. However, \glspl{CNN} are a category of neural networks that have proven to be very effective in areas such as image recognition and classification \cite{SZ2015,He2016}. They have been successfully applied to image processing problems such as face and object identification. In an \gls{CNN}, convolutional layers followed by an activation function perform the feature extraction task. This eliminates the need to manually select features and allows the algorithm to decide for itself which features are useful to separate the content of the images \cite{Alp2020}. + +These layers are stacked on top of each other to form a deep pyramidal layout. In this layout, the input image is transformed into representations that increasingly take care of the actual content of the image \cite{ZF2014}. Fully connected layers follow these feature extraction layers to perform classification using the softmax activation function \cite{Alp2020}. + +An \gls{CNN} that has already been trained to recognise objects in images has thus learned to assign features of the content of a given image. The \gls{NST} algorithms are characterised by using this feature extraction or encoder $E$ of the \gls{CNN} to extract representations of content and style \cite{Glas2021}. This is because using the features from a pre-trained \gls{CNN} has improved the quality of the synthesis in comparison to features in pixel-space~\cite{EL1999} or in a hand-crafted feature space~\cite{PS2000}. Therefore, for the \gls{NST}, the features of the intermediate convolutional layers of the encoder $E$ are used up to a layer $l$ for the algorithmic identification of the content \cite{GEB2016}. + +Besides the architecture and the weights used, the image size of the input images also has an influence on this extracted representation. An example of style transfer with the same parameters but two different image sizes can be seen in \figref{fig:diff_size_nst}. It can be seen that the result differs due to the different extracted content and style representations of the encoder $E$. +\begin{figure}[h] + \centering + \begin{minipage}{.5\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__size_256.jpg} + \end{minipage}% + \begin{minipage}{.5\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__size_512.jpg} + \end{minipage}\\ \vspace{0.15cm} + \begin{minipage}{.5\textwidth} + \centering + Result with image size $256$ + \end{minipage}% + \begin{minipage}{.5\textwidth} + \centering + Result with image size $512$ + \end{minipage} + \caption{The result of a Neural Style Transfer with the same hyperparameters but a different image size of the images for the optimisation. The image size refers to the smaller image side for both the content image and the style image. On the left the result is shown with an image size of $256$ and on the right with an image size of $512$.} + \label{fig:diff_size_nst} +\end{figure} + +\subsection{Image-based Neural Style Transfer} +The technical principle of the image-based \gls{NST} is to define two distance functions and minimise their distances. The \emph{content loss function} \contentloss{} describes how different the content is between the synthesised image \image{} and the content image \contentimage{}. The \emph{style loss function} \styleloss{}, on the other hand, describes the distance of the synthesised image \image{} from the style image \styleimage{} with respect to their style \cite{Glas2021}. + +The aim of the \gls{NST} is to synthesise an image by keeping the distance to these two functions as small as possible. Or mathematically, a minimisation of these functions is carried out \cite{Glas2021}. These two loss functions are merged for the minimisation by a weighted sum. The complete loss function \loss{} for the \gls{NST} is defined as follows \cite{GEB2016}: +\begin{equation*} + \loss\of{\image\eqcommasep \contentimage\eqcommasep \styleimage} = \lambda_\text{C} \cdot \contentloss\of{\image\eqcommasep \contentimage} \eqspace+\eqspace \lambda_\text{S} \cdot \styleloss\of{\image\eqcommasep \styleimage} \eqtextdot +\end{equation*} +The weights $\lambda_\text{C}$ and $\lambda_\text{S}$ are hyperparameters that influence the weighting between the content and the style \cite{GEB2016}. + +An example of the result with different weightings can be seen in \figref{fig:weighting_nst}. The figure shows the result of the \gls{NST} due to a stylisation with a constant content weight but different weights for the style. With a small style weight, the content is more dominant from the content image. However, the higher the weighting of the style, the more dominant is the representation of the style in the result. +\begin{figure}[h] + \centering + \begin{minipage}{.25\textwidth} + \centering + \includegraphics[width=.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__style_weight_1.0.jpg} + \end{minipage}% + \begin{minipage}{.25\textwidth} + \centering + \includegraphics[width=.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__style_weight_10.0.jpg} + \end{minipage}% + \begin{minipage}{.25\textwidth} + \centering + \includegraphics[width=.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__style_weight_100.0.jpg} + \end{minipage}% + \begin{minipage}{.25\textwidth} + \centering + \includegraphics[width=.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__style_weight_1000.0.jpg} + \end{minipage}\\ \vspace{0.15cm} + \begin{minipage}{.25\textwidth} + \centering + $\lambda_\text{S} = 1$ + \end{minipage}% + \begin{minipage}{.25\textwidth} + \centering + $\lambda_\text{S} = 10$ + \end{minipage}% + \begin{minipage}{.25\textwidth} + \centering + $\lambda_\text{S} = 100$ + \end{minipage}% + \begin{minipage}{.25\textwidth} + \centering + $\lambda_\text{S} = 1000$ + \end{minipage} + \caption{The result of a Neural Style Transfer with different weights between the content loss function and the style loss function. From left to right, the results with a content weight of $\lambda_\text{C} = 1$ and a style weight of $\lambda_\text{S} = \{1, 10, 100, 1000\}$ are shown.} + \label{fig:weighting_nst} +\end{figure} + +The optimisation of such a loss function \loss{} is a central part of \gls{ML} \cite{Alp2020}. In \gls{NST}, the same optimisation algorithms are used to synthesise the new image \image{} by iteratively adjusting its pixel values. A general definition for the optimisation can be defined as follows \cite{GEB2016}: +\begin{equation*} + \argmin{\image}{\loss\of{\image\eqcommasep \contentimage\eqcommasep \styleimage}} \eqtextdot +\end{equation*} +The pixel values are adjusted with each iteration with a step size of the so-called learning rate so that the loss function becomes smaller. With each iteration, a result image is thus created, which represents a better \gls{NST} result after the definition of the loss functions. + +\figref{fig:iteration_nst} shows the results of an \gls{NST} procedure created with a different number of iterations. It can be seen that this makes the content of the content image clearer in the progress of the iterations. It should be noted that the optimisation converges and there are no longer any significant visual differences after a certain number of steps. This is also the explanation for the fact that the difference between the results of $500$ and $1000$ steps is small. + +Besides the iterations, the starting point of the synthesised image \image{} has an influence on the result \cite{GEB2016}. The influence of a different initialisation can be seen in \figref{fig:diff_init_nst}. The result is shown once with a noise image and once with an initialisation with the content image. It can be seen that there are significant differences in the results. The initialisation with the content image is often used, as it allows the optimisation process to converge more quickly and produces subjectively better results in relation to the content of the image \cite{GEB2016}. +\begin{figure}[h] + \centering + \begin{minipage}{.33\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__iteration_100.jpg} + \end{minipage}% + \begin{minipage}{.33\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__iteration_500.jpg} + \end{minipage}% + \begin{minipage}{.33\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__iteration_1000.jpg} + \end{minipage}\\ \vspace{0.15cm} + \begin{minipage}{.33\textwidth} + \centering + Result after $100$ iteration + \end{minipage}% + \begin{minipage}{.33\textwidth} + \centering + Result after $500$ iteration + \end{minipage}% + \begin{minipage}{.33\textwidth} + \centering + Result after $1000$ iteration + \end{minipage} + \caption{The result of a Neural Style Transfer with a different number of optimisation steps. From left to right, the result image is shown after $100$, $500$, and $1000$ iterations.} + \label{fig:iteration_nst} +\end{figure} + +\begin{figure}[h] + \centering + \begin{minipage}{.5\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__start_content.jpg} + \end{minipage}% + \begin{minipage}{.5\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__start_random.jpg} + \end{minipage}\\ \vspace{0.15cm} + \begin{minipage}{.5\textwidth} + \centering + Initialisation with content image + \end{minipage}% + \begin{minipage}{.5\textwidth} + \centering + Initialisation with random noise image + \end{minipage} + \caption{The result of a Neural Style Transfer with the same hyperparameters but a different initialisation of the synthesised image for the optimisation. The left side shows the result of an initialisation with the content image. The right side shows the result of an initialisation with a random noise image.} + \label{fig:diff_init_nst} +\end{figure} + +The aim of calculating the \emph{content loss function} \contentloss{} is to hide unnecessary details and determine only the distance to the actual content of the image \cite{Glas2021}. As mentioned earlier, the features obtained from the higher layers of the encoder $E$ can be considered more related to the content of the image. Therefore, for the calculation of this loss, the features of content image and input image passed through the encoder $E$ to the layer $l_C$ are compared. The difference between these images is calculated by the \gls{MSE} between the extracted features with \cite{GEB2016} +\begin{equation*} + \contentloss\of{\image\eqcommasep \contentimage} = \mean \parentheses{E^{l_\text{C}}\of{\image} - E^{l_\text{C}}\of{\contentimage}}^2 \eqtextdot +\end{equation*} +The overlined sum symbol without indices denotes the mean value of the following normalised tensors. This mean value is also known as grand sum. + +The calculation of the \emph{style loss function} \styleloss{} is similar to the \emph{content loss function} \contentloss{} with two differences. First, the features of several layers from the encoder $E$ are used. The reason for this is that it transfers style elements of different size and detail \cite{Glas2021}. The stylisation using different layers for the \emph{style loss function} \styleloss{} can be seen in \figref{fig:diff_depth_nst}. In this style, the colour in particular makes up the style in the first layers. In the deeper layers, however, more complex structures of the mosaic texture are transferred. The overall result with all four depths on the right is therefore a combination of these style details. +\begin{figure}[h] + \centering + \begin{minipage}{.2\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__relu1_1.jpg} + \end{minipage}% + \begin{minipage}{.2\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__relu2_2.jpg} + \end{minipage}% + \begin{minipage}{.2\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__relu3_1.jpg} + \end{minipage}% + \begin{minipage}{.2\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__relu4_1.jpg} + \end{minipage}% + \begin{minipage}{.2\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/results/nst_IST_paper_bird__mosaic__full.jpg} + \end{minipage}\\ \vspace{0.15cm} + \begin{minipage}{.2\textwidth} + \centering + Depth $1$ + \end{minipage}% + \begin{minipage}{.2\textwidth} + \centering + Depth $2$ + \end{minipage}% + \begin{minipage}{.2\textwidth} + \centering + Depth $3$ + \end{minipage}% + \begin{minipage}{.2\textwidth} + \centering + Depth $4$ + \end{minipage}% + \begin{minipage}{.2\textwidth} + \centering + Depth $1-4$ + \end{minipage}% + \caption{The result of a Neural Style Transfer using style representations from four different depths as well as the result with all four layers simultaneously is shown.} + \label{fig:diff_depth_nst} +\end{figure} + +Another difference is that the features are not used directly. The reason for this is that the global arrangement or the content of the image is not important for the style of an image. The style of an image rather refers to how colours and shapes are used to create forms \cite{Glas2021}. To demonstrate this, \figref{fig:generated_style_nst} shows two images that have a similar style according to the definition of the \emph{style loss function} from \textsc{Gatys}, \textsc{Ecker}, and \textsc{Bethge} \cite{GEB2016}. Shown is the original style image \emph{The Starry Night} by \emph{Vincent van Gogh} and the generated style image. It can be seen that the generated style image has the same colours and contours as the original image. But the actual content like the houses in the lower part of the image are ignored. +\begin{figure}[h] + \centering + \begin{minipage}{.5\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/source/starry_night__vincent_van_gogh.jpg} + \end{minipage}% + \begin{minipage}{.5\textwidth} + \centering + \includegraphics[width=0.95\linewidth]{graphics/images/nst/results/nst_IST_paper__starry_night__generated.jpg} + \end{minipage}\\ \vspace{0.15cm} + \begin{minipage}{.5\textwidth} + \centering + Original style image + \end{minipage}% + \begin{minipage}{.5\textwidth} + \centering + Generated style image + \end{minipage} + \caption{An original style image \emph{The Starry Night} by Vincent van Gogh (left) and a generated image (right) created with the style loss function according to \textsc{Gatys}, \textsc{Ecker}, and \textsc{Bethge}\cite{GEB2016} that has a similar style are shown.} + \label{fig:generated_style_nst} +\end{figure} +In \gls{NST} there are various methods to capture these objective style properties. Since the calculation in detail is not relevant here, reference is made to the individual approaches for further details\cite{GEB2016,LW2016}. However, to indicate that the features are not used directly, the function $g(\cdot)$ is used. The style loss $\mathcal{L}_{\text{S, g}\eqcommasep l_\text{S}}$ on a layer $l_\text{S}$ can thus be determined as follows \cite{GEB2016}: +\begin{equation*} + \mathcal{L}_{\text{S, g}\eqcommasep l_\text{S}}\of{\tensor{I}\eqcommasep \tensor{I}_\text{S}} = \mean \parentheses{g\of{E^{l_\text{S}}\of{\tensor{I}}} - g\of{E^{l_\text{S}}\of{\tensor{I}_\text{S}}}}^2\eqtextcomma +\end{equation*} +The total style loss $\mathcal{L}_\text{S}$ is finally obtained by a weighted sum of the losses on the selected layers with \cite{GEB2016} +\begin{equation*} + \mathcal{L}_\text{S}\of{\tensor{I}\eqcommasep \tensor{I}_\text{S}} = \sum_{\substack{l_\text{S} \in\\L_\text{S, g}}} \lambda_{l_\text{S}} \cdot \mathcal{L}_{\text{S, g}\eqcommasep l_\text{S}}\of{\tensor{I}\eqcommasep \tensor{I}_\text{S}} \eqtextdot +\end{equation*} + +In summary, in the image-based approaches, the stylised images are created in an iterative process by an optimisation process. This process has to be performed for each individual image, which means that these approaches are slow, but that the results can be better adapted. For this adaptation, there are several parameters or setting options with which the result can be influenced. These can be used to improve or adjust the result for a style image. However, for a replication of results, it is important to use the same hyperparameters in order to achieve at least comparable results. Otherwise, the results will show significant deviations, and it will not be possible to determine whether the replication was successful. + +\subsection{Model-based Neural Style Transfer} +In contrast to the image-based \gls{NST}, the model-based \gls{NST} can be used to convert any content image into a stylised image without performing the optimisation for each individual image. The reason for this is that no single image is optimised, but a model or transformer \transformer{} consisting of several layers. The principle of the model-based \gls{NST} is therefore to learn a transformation with the transformer that transforms any input image into an artistic representation \cite{JAL2016}. + +For this purpose, a dataset is needed for training so that the transformer can learn to apply the stylisation to different new images. The choice of dataset depends on the domain of the images to be transformed. As a rule, datasets with images that are also to be transformed after the training are used. This means, for example, if portraits are to be stylised, a dataset with portraits is used for the training. The reason for this is that it does not make sense to learn a transformation for landscapes if this transformation is used for portraits after the training \cite{JAL2016,ULVL2016,UVL2017}. + +The loss for the optimisation can be calculated using the procedures from the image-based \gls{NST}. However, during training, several images can be used simultaneously to calculate an average adjustment of the weights \cite{Alp2020}. This is also called batch size and can help the training to converge faster. The optimisation in this case aims to adjust the weights of the transformer in an iterative process to achieve better result images. The weights are initialised randomly. A general definition for the optimisation in the model-based \gls{NST} can be defined as follows \cite{JAL2016}: +\begin{equation*} + \argmin{\transformer}{\loss\of{\image\eqcommasep \contentimage\eqcommasep \styleimage}} \eqtextdot +\end{equation*} +After successful training, this transformer can then be used to transform any images without optimisation procedures \cite{JAL2016}. + +Various architectures exist for the structure of the transformer \transformer{}. These differ between the approaches. Basically, the architecture consists of blocks consisting of a convolution layer, an activation function followed by a normalisation layer. By stacking these blocks together, the transformer is able to learn the complex task of stylisation. The exact architecture of the approaches is not described here and reference is made to the approaches \cite{JAL2016,ULVL2016,UVL2017}. However, it must be said that a change to the described approach through additional layers or modified layers has an influence on the transformation and can thus affect the result. + +In summary, in model-based approaches, a model is trained in advance, which can then stylise any input image. The same loss functions as in the image-based methods can be used for training. Additional training parameters, the dataset and the architecture of the transformer are added to the parameters from the image-based method. Due to the random initialisation of the weights in the transformer, it is not to be expected that the results in the replication are identical. \ No newline at end of file diff --git a/rescience_paper/sections/replicated_Paper.tex b/rescience_paper/sections/replicated_Paper.tex new file mode 100644 index 00000000..d21c86cc --- /dev/null +++ b/rescience_paper/sections/replicated_Paper.tex @@ -0,0 +1,39 @@ +\section{Replicated Paper} \label{sec:replicated_paper} +For the \gls{NST} algorithms presented in \secref{sec:preliminaries}, there are a large number of different publications and small improvements of individual algorithms. For this reason, this study reproduces the basic algorithms with new innovative approaches. The approaches differ essentially according to the category, whether it is an image-based approach or a model-based approach. An overview of the replicated algorithms can be found in \tabref{tab:used_methods}. + +As can be seen from the \tabref{tab:used_methods}, another major difference between the individual approaches is the choice of style model. The style of an image includes structural elements such as brushstrokes, colours, and contrast. These texture features can be described after \author{Zhou} \cite{Zho2006} with two different approaches: +\begin{itemize} + \item a stochastic approach assumes that if the global statistics of the images match, the underlying textures also match + \item a structural approach assumes that a texture can be described by the regular or irregular joining of small patches +\end{itemize} + +There are style models in \gls{NST} for both approaches. For this reason, first of all, the algorithm that initiated the \gls{NST} has been replicated. \textsc{Gatys}, \textsc{Ecker} and \textsc{Bethge} follows an approach based on global statistics \cite{GEB2016}. In contrast to this, the approach of \textsc{Li} and \textsc{Wand} covers the fundamentally different approach of the structural approach \cite{LW2016}. + +To increase the usefulness of the \gls{NST} as an artistic tool, it is important that the user can easily and intuitively control the output of the algorithm. For this reason, the approach from \etal{Gatys} \cite{GEB+2017} has been replicated. They show that their method enables controlled stylisation and helps mitigate common error cases such as applying ground textures to sky regions. This means that a region-to-region condition can be added to the style transfer by using a mask. This additional condition ensures that, for example, the eye in the content image is stylised in the same way as the eye in a painting and unwanted artefacts due to local fitting errors are reduced. + +\begin{table*}[b] + \renewcommand{\arraystretch}{1.3} + \caption{Overview of replicated methods, their categorisation and the model used for the style.} + \label{tab:used_methods} + \centering + \resizebox{\textwidth}{!}{% + \begin{tabular}{c|c|c} + \hline + \bfseries Methods & \bfseries + Categorisation & \bfseries Style Model\\ + \hline\hline + \textsc{Gatys}, \textsc{Ecker}, and \textsc{Bethge} \cite{GEB2016} & & stochastic \\ + \textsc{Li} and \textsc{Wand} \cite{LW2016} & image-based & structural\\ + \etal{Gatys} \cite{GEB+2017} & & stochastic with additional region condition\\ + \hline + \etal{Johnson} \cite{JAL2016} & \multirow{2}{*}{model-based} & stochastic\\ + \etal{Ulyanov} \cite{ULVL2016,UVL2017} & & stochastic\\ + \hline + \end{tabular}} +\end{table*} + +The previous image-based methods are very slow because for each individual image an optimisation problem must be solved iteratively. This is why the model-based \gls{NST} algorithms exists. The first two model-based approaches are proposed by \etal{Johnson} \cite{JAL2016} and \etal{Ulyanov} \cite{ULVL2016} respectively. They only differ in the transformer architecture, for which \textsc{Johnson} et al.~design a feed-forward network, and \textsc{Ulyanov} et al.~use a pyramid-shaped structure. + +While models of the feed-forward network type by \textsc{Johnson} et al.~work with only one image resolution, the pyramid-shaped structure work with multiple resolutions simultaneously. It is also worth noting that in \textsc{Ulyanov} et al.~in the $2017$ follow-up publication, it was found that replacing batch normalisation with instance normalisation and applying instance normalisation to both training and testing leads to even faster performance and arguably more aesthetically pleasing results \cite{UVL2017}. Since these differ essentially only in the choice of norm, this paper is also included in the replication study. + +The procedures briefly presented above are part of the replication study and are published as a Python package \texttt{pystiche/papers}. The source code\footnote{\url{https://github.com/pmeier/pystiche_papers}} as well as the documentation\footlabel{docs}{\url{https://pystiche-papers.readthedocs.io}} of \texttt{pystiche/papers} are available online. \ No newline at end of file diff --git a/rescience_paper/sections/results.tex b/rescience_paper/sections/results.tex new file mode 100644 index 00000000..181da44a --- /dev/null +++ b/rescience_paper/sections/results.tex @@ -0,0 +1,769 @@ +\section{Results} +In this chapter, the results of the replications are presented. For this purpose, the results for each replicated approach from the \secref{sec:replicated_paper} are presented in a figure. This figure contains, in addition to the content and style images, the results of the \gls{NST} created with \paper{} and \implementation{}. For a direct comparison, the results from the original publication are also shown, provided we have obtained permission from the copyright holder. Alternatively, results are shown with further information from the authors, or the results produced with the published models. + +We have also made a replica of the image in the original paper for the respective results from \paper{} and \implementation{}. Where appropriate, individual content or style images have been replaced in cases where copyright could not be traced. The replicas can be found in the appendix. + +\subsection{Gatys, Ecker, and Bethge 2016} \label{sec:Gatys1results} +The result of the replicated paper \cite{GEB2016} are shown in \figref{fig:GEB2016_fig_2}. The results with \paper{} did not visibly change the content image and therefore did not yield a meaningful stylised result. The reason for this is the different weighting between content and style, because in this case we have omitted an additional standardisation by the number of channels due to the missing specification in \paper{}. This standardisation factor corresponds to a value of $1/n^2$, where $n$ stands for the number of channels and take values of $n \in \{128, 256, 512\}$. A missing factor in such an order of magnitude can therefore lead to a lack of stylisation with this approach. + +However, there are also differences compared to the original release with \implementation{}, which are not only due to rounding errors on the \gls{GPU} or initialisation conditions. We assume this because we do not use random initialisation as a starting point and there are clear differences in the dominance of the style compared to the original release, which are comparable to the experience in \figref{fig:weighting_nst} with different weightings between content and style. + +For this reason, we contacted the authors of the original release to get information about the hyperparameters. The adjustments of the hyperparmeters provided by the author are shown in the \tabref{tab:Gatys_2016_adapted_params} in the Appendix. The results with the adjusted hyperparameter are shown in \figref{fig:GEB2016_fig_2} in the right column. A major change is that a random initialisation of the starting point is used, which means that an exact reproduction of the images is no longer possible without the knowledge of the starting point at that time. However, the results in \figref{fig:GEB2016_random_init} with different starting points demonstrate that we can achieve plausible results with the information provided by the author. We therefore assume that the replication of this paper is successful. This is supported in particular by another following replication of this author, in which we were able to achieve very similar results with the content image as a starting point (see \figref{fig:GEB+2017_fig2}). +\begin{figure}[H] + \centering + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/source/tuebingen_neckarfront__andreas_praefcke.jpg}\\ + content image + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/source/shipwreck_of_the_minotaur__turner.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__B_paper.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__B.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/author/fig_2__B__impl_params__author2.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=0.9\textwidth]{images/gatys_ecker_bethge_2016/source/starry_night__vincent_van_gogh.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__C_paper.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__C.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/author/fig_2__C__impl_params__author2.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=0.6\textwidth]{images/gatys_ecker_bethge_2016/source/the_scream__edvard_munch.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__D_paper.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__D.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/author/fig_2__D__impl_params__author2.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=0.57\textwidth]{images/gatys_ecker_bethge_2016/source/figure_dans_un_fauteuil__pablo_ruiz_picasso.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__E_paper.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__E.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/author/fig_2__E__impl_params__author2.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/source/composition_vii__wassily_kandinsky.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__F_paper.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/fig_2__F.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/author/fig_2__F__impl_params__author2.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + Style image + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \paper{} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \implementation{} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + Author + \end{minipage} + \caption{Our stylised results with \paper{} and \implementation{} according to the method from \cite{GEB2016}. The content image of the Neckarfront in Tübingen, Germany, on which the style transfer was applied, is in the first column. Below it, in the left column, are the respective style images, followed by the result with \paper{} and \implementation{}. In the last column are our results created with the information provided by the author of the original approach.} + \label{fig:GEB2016_fig_2} +\end{figure} + + +\begin{figure}[H] + \centering + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/author/fig_2__C__impl_params__author.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/author/fig_2__C__impl_params__author2.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.3\textwidth} + \centering + \includegraphics[width=\textwidth]{images/gatys_ecker_bethge_2016/results/author/fig_2__C__impl_params__author3.jpg} + \end{minipage} + \caption{Our results of the method according to \cite{GEB2016} with the information provided by the author of the original approach. For the individual images, different random noise images have been selected as starting point for the style transfer. All other hyperparameters are the same in these images.} + \label{fig:GEB2016_random_init} +\end{figure} + + +\subsection{Li and Wand 2016} +\figref{fig:LW2016_fig6} shows our results from Figure 6 of the original publication in \cite{LW2016}. We use a different content image in the lower row, as the original image was not freely available. However, it can be seen that there are significant differences between \paper{} and \implementation{}. While the parameters from \implementation{} create useful stylised images, the style dominates when the parameters from the paper are used. This example shows how big the difference in the results can be due to the differences between \paper{} and \implementation{}. + +There are also significant differences to the original release. If the image from the top row is compared with the corresponding image from the original work, the result shows less darker artefacts, which in the original result appear mainly in the hair and background. We therefore contacted the author, who provided helpful explanations and said that subsequent changes to the code may result in differences from the release. Therefore, we suspect that the difference from the original implementation is due to these changes and that this is a successful replication of the stylised results. + +\begin{figure}[H] + \centering + \begin{minipage}[t]{0.23\textwidth} + \centering + \includegraphics[width=\textwidth]{images/li_wand_2016/source/blue_bottle__christopher_michel_(cmichel67).jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.23\textwidth} + \centering + \includegraphics[width=\textwidth]{images/li_wand_2016/source/self-portrait__pablo_ruiz_picasso.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.23\textwidth} + \centering + \includegraphics[width=\textwidth]{images/li_wand_2016/results/fig_6__top_paper.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.23\textwidth} + \centering + \includegraphics[width=\textwidth]{images/li_wand_2016/results/fig_6__top.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.23\textwidth} + \centering + \includegraphics[width=\textwidth]{images/li_wand_2016/source/s__theilr.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.23\textwidth} + \centering + \includegraphics[width=\textwidth]{images/li_wand_2016/source/composition_viii__wassily_kandinsky.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.23\textwidth} + \centering + \includegraphics[width=\textwidth]{images/li_wand_2016/results/fig_6__bottom_paper.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.23\textwidth} + \centering + \includegraphics[width=\textwidth]{images/li_wand_2016/results/fig_6__bottom.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.23\textwidth} + \centering + Content image + \end{minipage} + \hfill% + \begin{minipage}[t]{0.23\textwidth} + \centering + Style image + \end{minipage} + \hfill% + \begin{minipage}[t]{0.23\textwidth} + \centering + \paper{} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.23\textwidth} + \centering + \implementation{} + \end{minipage} + \caption{Replication of the results in Figure 6 in \cite{LW2016}. In left column are the content images, followed by the style images and the respective results with \paper{} and \implementation{}.} + \label{fig:LW2016_fig6} +\end{figure} + +\subsection{Gatys et al. 2017} \label{sec:Gatys2results} +The replicated results are shown in \figref{fig:GEB+2017_fig2}. The results show that there is only a minimal difference between our results with \implementation{} and \paper{} as well as in comparison to the original publication. We are thus able to successfully replicate the published images. The differences in this case consist mainly of small additional pre-factors of $1/2$ or $1/4$. These therefore have only a minor effect on the result. + +The result also shows that the differences in \secref{sec:Gatys1results} arise due to the use of a random starting point, since in this case the results can be replicated with the same approach and the content image as starting point (see the second column in \figref{fig:GEB+2017_fig2}). +\begin{figure}[H] + \centering + \begin{minipage}[t]{0.25\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=0.9\textwidth]{images/gatys_et_al_2017/source/house_concept_tillamook.jpg}}; + \node at (a.north east)[anchor=north east,xshift=0mm,yshift=0mm] + {\includegraphics[width=0.2\textwidth]{images/gatys_et_al_2017/source/house_concept_tillamook/building.png}}; + \end{tikzpicture}\\ + \centering + Content image + \end{minipage}\hfill% + \begin{minipage}[t]{0.25\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=0.9\textwidth]{images/gatys_et_al_2017/source/watertown__shop602835_store.jpg}}; + \node at (a.north east)[anchor=north east,xshift=0mm,yshift=0mm] + {\includegraphics[width=0.2\textwidth]{images/gatys_et_al_2017/source/watertown__shop602835_store/building.png}}; + \end{tikzpicture}\\ + \centering + Style image I + \end{minipage}\hfill% + \begin{minipage}[t]{0.25\textwidth} + \centering + \vspace{0pt} + \begin{tikzpicture} + \node(a){\includegraphics[width=0.75\textwidth]{images/gatys_et_al_2017/source/wheat_field_with_cypresses__vincent_van_gogh.jpg}}; + \node at (a.north east)[anchor=north east,xshift=0mm,yshift=0mm] + {\includegraphics[width=0.2\textwidth]{images/gatys_et_al_2017/source/wheat_field_with_cypresses__vincent_van_gogh/foreground.png}}; + \end{tikzpicture} + Style image II + \end{minipage} +\hfill% +\vspace{0.5cm} +\begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_2__d_paper.jpg} +\end{minipage} +\hfill% +\begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_2__d.jpg} +\end{minipage} +\hfill% +\begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_2__e_paper.jpg} +\end{minipage} +\hfill% +\begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_2__e.jpg} +\end{minipage} +\hfill% +\begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_2__f_paper.jpg} +\end{minipage} +\hfill% +\begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_2__f.jpg} +\end{minipage} + \hfill% +\begin{minipage}[t]{0.45\textwidth} + \centering + \paper{} +\end{minipage} +\hfill% +\begin{minipage}[t]{0.45\textwidth} + \centering + \implementation{} +\end{minipage} +\caption{Replication of Figure 2 from \cite{GEB+2017}. In the upper row the used content image, the first style image and a second style image for the sky region. The results in the left column are with \paper{} and the right column with \implementation{}. The results are presented using the original method of \cite{GEB2016}, the new approach with a guided variant in which the style transfer from sky to sky and house to house is achieved by masks and a third variant in which a second stylistic image is used for the sky area.} +\label{fig:GEB+2017_fig2} +\end{figure} + +Another approach from the paper is the colour control, the replicated results of this method are shown in \figref{fig:GEB+2017_fig3}. The figure shows that in some cases it may be useful to want to preserve the colour of the original image. In order to achieve this, two different methods have been investigated \cite{GEB+2017}. On the one hand, the transmission of luminance, which will preserve the colour of the content image perfectly, but individual strokes can change the colour unnatural \cite{GEB+2017} (images in third row in \figref{fig:GEB+2017_fig3}). On the other hand, a colour histogram matching was used \cite{GEB+2017}(images last row in \figref{fig:GEB+2017_fig3}). + +\begin{figure}[H] + \centering + \begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/source/schultenhof_mettingen.jpg} + Content image + \end{minipage} + \hfill% + \begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/source/starry_night_over_the_rhone__vincent_willem_van_gogh.jpg}\\ + Style image + \end{minipage} +\hfill% + \begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_3__c_paper.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_3__c.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_3__d_paper.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_3__d.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_3__e_paper.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.45\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/gatys_et_al_2017/results/fig_3__e.jpg} + \end{minipage} + \hfill% +\begin{minipage}[t]{0.45\textwidth} + \centering + \paper{} +\end{minipage} +\hfill% +\begin{minipage}[t]{0.45\textwidth} + \centering + \implementation{} +\end{minipage} + \caption{Replication of Figure 3 in \etal{Gatys} \cite{GEB+2017}. In the upper row, the used content image and the style image. The results in the left column are with \paper{} and the right column with \implementation{}.} + \label{fig:GEB+2017_fig3} +\end{figure} + +\subsection{Johnson et al. 2016} +The authors of the original publication provide models with the hyperparameters used during the training in their GitHub repository. However, the hyperparameters differ not only from the data in \paper{} but also from the default values from \implementation{}. This information also depends on the style used and whether instance norm has been used or not. The following hyperparameters may be affected: + +\begin{itemize} + \item the score weight for content loss, style loss and regularization loss + \item the size the style image is resized to, and + \item the number of batches. +\end{itemize} + +An overview of the adapted parameters that we also used for the replication with \implementation{} can be found in the \tabref{tab:johnson_adapted_hyperparams} in the appendix. + +Our results for some sample results are shown in \figref{fig:johnson_alahi_li_2016_fig}. Since the optimisation is based on random initial conditions of the transformer weights, we do not expect an exact reproduction of the images, but qualitative and meaningful stylised results. For comparison, the right column in \figref{fig:johnson_alahi_li_2016_fig} shows the results we have produced using the models provided by the authors. All of our trained models as well as scripts to use the pre-trained model weights of the original authors for the stylisation are available online. + +Our results with \paper{} show that no stylised images are generated. The reason for this is the specified value range delimiter in \paper{}, which transforms the transformed images into a wrong value range and thus makes a training impossible. This looks different with the results in \implementation{}. As can be seen in the results, we can achieve similar results with our models to those published by the authors. An exception is the style in the last column, where the style of our model appears much smaller. However, contacting the author to get further information for this style via hyperparameters or other preprocessing steps for the style image has not been successful so far. However, we suspect that this difference is due to a different image size for the style image, as the difference is comparable to the results in \figref{fig:diff_size_nst}. +\begin{figure}[H] + \centering + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/source/chicago.jpg}\\ + content image + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/johnson_alahi_li_2016/source/mosaic.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__mosaic__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__mosaic__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/chicago__mosaic__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/source/starry_night.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__starry_night__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__starry_night__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/chicago__starry_night__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=0.6\textwidth]{images/johnson_alahi_li_2016/source/candy.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__candy__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__candy__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/chicago__candy__impl_params__instance_norm.jpg} + \end{minipage} +% \hfill% +% \begin{minipage}[t]{0.24\textwidth} +% \centering +% \includegraphics[width=0.5\textwidth]{images/johnson_alahi_li_2016/source/the_scream.jpg} +% \end{minipage} +% \hfill% +% \begin{minipage}[t]{0.24\textwidth} +% \centering +% \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__the_scream__instance_norm.jpg} +% \end{minipage} +% \hfill% +% \begin{minipage}[t]{0.24\textwidth} +% \centering +% \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__the_scream__impl_params__instance_norm.jpg} +%% \end{minipage} +% \hfill% +% \begin{minipage}[t]{0.24\textwidth} +% \centering +% \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/chicago__the_scream__impl_params__instance_norm.jpg} +% \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=0.5\textwidth]{images/johnson_alahi_li_2016/source/feathers.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__feathers__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/chicago__feathers__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/johnson_alahi_li_2016/results/author_models/chicago__feathers__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + Style image + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \paper{} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \implementation{} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + Author models + \end{minipage} + \caption{Our stylisation results of the content image due to the transformation of the models we trained with the information in \paper{} and \implementation{} in different styles with the approach of \etal{Johnson} \cite{JAL2016}. The respective style images are shown in the left column. The last column shows the results we created with the models provided by the author online.} + \label{fig:johnson_alahi_li_2016_fig} +\end{figure} + +\subsection{Ulyanov et al. 2016} \label{sec:ulyanov_2016} +In contrast to the publication of \etal{Johnson}, no models are available online for \etal{Ulyanov}. Furthermore, no detailed information is available on the individual style images used. It is therefore to be expected that there will be discrepancies between the results of \etal{Ulyanov} and the results with \paper{} and \implementation{}. For this reason, the results are evaluated for plausibility. Changes are then due to the fact that the correct hyperparameters are not available. In addition, the result is shown here using the content image \emph{Tuebingen Neckarfront} from Andreas Praefcke as an example. The reason for this is that many of the images used in the original paper are no longer freely available. + +Our stylisation results for some exemplary style images are shown in \figref{fig:ulyanov_et_al_2016_results}. It can be seen that with \paper{} there is stylisation, but the content of the image is lost. In addition, the style elements are very small. The reason for this is that no image size is specified for the stylisation. Without this image pre-processing of the images, unusable images are created. In contrast, usable result images can be achieved with \implementation{}. But the style dominates in these images. Thus, the results differ from the original images. However, the results are comparable to the example in \figref{fig:weighting_nst}. For this reason, we suspect that by adjusting the weights, similar images to the original publication can be created with our replication. +\begin{figure}[H] + \centering + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/tuebingen_neckarfront__andreas_praefcke.jpg}\\ + content image + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/candy.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__candy__neckarfront.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__candy__neckarfront__impl_params.jpg} + \end{minipage} + \hfill% + \\ + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/ulyanov_et_al_2016/source/jean_metzinger.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__jean_metzinger__neckarfront.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__jean_metzinger__neckarfront__impl_params.jpg} + \end{minipage} + \hfill% + \\ + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/mosaic.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__mosaic__neckarfront.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__mosaic__neckarfront__impl_params.jpg} + \end{minipage} + \hfill% + \\ + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/pleades.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__pleades__neckarfront.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__pleades__neckarfront__impl_params.jpg} + \end{minipage} + \hfill% + \\ + \begin{minipage}[t]{0.24\textwidth} + \centering + Style image + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \paper{} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \implementation{} + \end{minipage} + \caption{Our stylisation results of the content image due to the transformation of the models we trained with the information in \paper{} and \implementation{} in different styles with the approach of \etal{Ulyanov} \cite{ULVL2016}. The respective style images are shown in the left column.} + \label{fig:ulyanov_et_al_2016_results} +\end{figure} + +\subsection{Ulyanov et al. 2017} +The authors published an improved version of their initial paper \cite{ULVL2016} with a significant change. They developed \emph{InstanceNorm} and used it as drop-in replacement for \emph{BatchNorm} layers. Unfortunately, no models exist for this approach either. In addition, the parameters deviate from the initial work. Thus, even with this replication, there are differences to the original work. + +Our stylisation results for some exemplary style images are shown in \figref{fig:ulyanov_et_al_2017_results}. It can be seen that with both \paper{} and \implementation{} the training of the models is unstable. In addition to unusable black result images, there are also images without stylisation. Furthermore, the same effect as before can be seen in \secref{sec:ulyanov_2016}. The images with \paper{} have been stylised without preprocessing, so the style representations are clearly smaller and the content of the content image is not recognisable. +\begin{figure}[H] + \centering + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/tuebingen_neckarfront__andreas_praefcke.jpg}\\ + content image + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \hfill + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/candy.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__candy__neckarfront__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__candy__neckarfront__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \\ + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=0.8\textwidth]{images/ulyanov_et_al_2016/source/jean_metzinger.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__jean_metzinger__neckarfront__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__jean_metzinger__neckarfront__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \\ + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/mosaic.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__mosaic__neckarfront__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__mosaic__neckarfront__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \\ + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/source/pleades.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__pleades__neckarfront__instance_norm.jpg} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \includegraphics[width=\textwidth]{images/ulyanov_et_al_2016/results/ulyanov_et_al_2016__pleades__neckarfront__impl_params__instance_norm.jpg} + \end{minipage} + \hfill% + \\ + \begin{minipage}[t]{0.24\textwidth} + \centering + Style image + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \paper{} + \end{minipage} + \hfill% + \begin{minipage}[t]{0.24\textwidth} + \centering + \implementation{} + \end{minipage} + \caption{Our stylisation results of the content image due to the transformation of the models we trained with the information in \paper{} and \implementation{} in different styles with the approach of \etal{Ulyanov} \cite{UVL2017}. The respective style images are shown in the left column.} + \label{fig:ulyanov_et_al_2017_results} +\end{figure} +