CHAPTER PRIMARY BIBLIOGRAPHY SECT Peter Wayner @Article{Wayner1992, author = {Peter Wayner}, title = {Mimic Functions}, journal = {Cryptologia}, month = {July}, year = {1992}, volume = {XVI}, number = {3}, pages = {193--214}, keywords = {compression; subliminal channels; context-free grammar}, abstract = {A mimic function changes a file $A$ so it assumes the statistical properties of another file $B$. That is, if $p(t,A)$ is the probability of some substring $t$ occuring in $A$, then a mimic function $f$, recodes $A$ so that $p(t,f(A))$ approximates $p(t,B)$ for all strings $t$ of length less than some $n$. This paper describes the algorithm with its functional inverse, Huffman coding. The paper also provides a description of more robust and more general mimic functions which can be defined using context-free grammars and van Wijngaarden grammars.} } @Article{Wayner1995, author = {Peter Wayner}, title = {Strong Theoretical Steganography}, journal = {Cryptologia}, month = {July}, year = {1995}, volume = {XIX}, number = {3}, pages = {285--299}, CODEN = {CRYPE6}, ISSN = {0161-1194}, keywords = {Mimic function; natural language processing; RSA}, abstract = {Hiding the existence of a message can be an important technique in this era of terabit networks. One technique for practicing this obfuscation, Mimic Functions, is derived from Context-Free Grammars and can be as secure as inverting RSA or factoring Blum integers. This paper discusses the implications of the result and presents a practical solution for securely hiding information from inspection.} } SECT Purdue CERIAS @inproceedings{Atallah2001, author = {Mikhail J. Atallah and Victor Raskin and Michael Crogan and Christian Hempelmann and Florian Kerschbaum and Dina Mohamed and Sanket Naik}, institution = {Purdue CERIAS}, title = {Natural Language Watermarking: Design, Analysis, and a Proof-of-Concept Implementation}, booktitle = {Information Hiding: Fourth International Workshop}, editor = {Ira S. Moskowitz}, location = {Pittsburgh, PA, USA}, month = {April}, year = {2001}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, volume = {2137}, ISBN = {3-540-42733-3}, pages = {185--199}, ISSN = {0302-9743}, URL = {http://omni.cc.purdue.edu/~vraskin/IHW.AtaRasEtAl.pdf}, abstract = {We describe a scheme for watermarking natural language text. Let $n$ denote the total number of sentences of a text, $\alpha$ denote the number of sentences that carry watermark bits. The modifications that an adversary can perform (for the purpose of removing the watermark) are as follows: (i) Meaning-preserving transformations of sentences of the text (e.g. translation to another natural language). This cannot damage the watermark. (ii) Meaning-modifying transformations of sentences of the text. Each such transformation has probability $\leq 3 \alpha / n$ of damaing the watermark. (iii) Insertions of new sentences in the text. Each such insertion has probability $\leq 2 \alpha / n$ of damaging the watermark. (iv) Moving a contiguous block of sentences from one place of the text to another. Each block-motion has probability $\leq 3 \alpha / n$ of damaging the watermark. Our scheme is keyed, and having the key is all that is required for reading the watermark; it does not require knowledge of the original (pre-watermark) version of the text, or knowledge of the watermark message. The probability of a ``false positive'', i.e. that the text spuriously contains any particular $w$-bit watermark, is $2^{-w}$.} } @InProceedings{Atallah2002, author = {Mikhail J. Atallah and Victor Raskin and Christian F. Hempelmann and Mercan Karahan and Radu Sion and Umut Topkara and Katrina E. Triezenberg}, institution = {Purdue CERIAS}, title = {Natural Language Watermarking and Tamperproofing}, booktitle = {Information Hiding: Fifth International Workshop}, editor = {Fabien A. P. Petitcolas}, location = {Noordwijkerhout, Netherlands}, month = {October}, year = {2002}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, volume = {2578}, pages = {196--212}, ISSN = {0302-9743}, URL = {http://omni.cc.purdue.edu/~vraskin/IHW-2002.pdf}, abstract = {Two main results in the area of information hiding in natural language text are presented. A semantically-based scheme dramatically improves the information hiding capacity of any text through two techniques: (i) modifying the granularity of meaning of individual sentences, whereas our own previous scheme kept the granularity fixed, and (ii) halving the number of sentences affected by the watermark. No longer a ``long text, short watermark'' approach, it now makes it possible to watermark short texts like wire agency reports. Using both the above-mentioned semantic marking scheme and our previous syntactically-based method hides information in a way that reveals any non-trivial tampering with the text (while re-formatting is not considered to be tampering -- the problem would be solved trivially otherwise by hiding a hash of the text) with a probability $l - 2^{ \beta (n + 1) }$, $n$ being its number of sentences and $\beta$ a small positive integer based on the extend of co-referencing.} } @TechReport{Bennett2004, author = {Krista Bennett}, institution = {Purdue CERIAS}, title = {Linguistic Steganography: Survey, Analysis, and Robustness Concerns for Hiding Information in Text}, number = {TR 2004-13}, month = {May}, year = {2004}, URL = {https://www.cerias.purdue.edu/ tools_and_resources/bibtex_archive/archive/2004-13.pdf, http://omni.cc.purdue.edu/~vraskin/Semantic-Mimicking.pdf}, abstract = {Steganography is an ancient art. With the advent of computers, we have vast accessible bodies of data in which to hide information, and increasingly sophisticated techniques with which to analyze and recover that information. While much of the recent research in steganography has been centered on hiding data in images, many of the solutions that work for images are more complicated when applied to natural language text as a cover medium. Many approaches to steganalysis attempt to detect statistical anomalies in cover data which predict the presence of hidden information. Natural language cover texts must not only pass the statistical muster of automatic analysis, but also the minds of human readers. Linguistically naive approaches to the problem use statistical frequency of letter combinations or random dictionary words to encode information. More sophisticated approaches use context-free grammars to generate syntactically correct cover text which mimics the syntax of natural text. None of these uses meaning as a basis for generation, and little attention is paid to the semantic cohesiveness of a whole text as a data point for statistical attack. This paper provides a basic introduction to steganography and steganalysis, with a particular focus on text steganography. Text-based information hiding techniques are discussed, providing motivation for moving toward linguistic steganography and steganalysis. We highlight some of the problems inherent in text steganography as well as issues with existing solutions, and describe linguistic problems with character-based, lexical, and syntactic approaches. Finally, the paper explores how a semantic and rhetorical generation approach suggests solutions for creating more believable cover texts, presenting some current and future issues in analysis and generation. The paper is intended to be both general enough that linguists without training in information security and computer science can understand the material, and specific enough that the linguistic and computational problems are described in adequate detail to justify the conclusions suggested.} } @InProceedings{Topkara2005, author = {Mercan Topkara and Cuneyt M. Taskiran and Edward J. Delp}, institution = {Purdue CERIAS}, title = {Natural Language Watermarking}, booktitle = {Proceedings of the SPIE International Conference on Security, Steganography, and Watermarking of Multimedia Contents}, editor = {Edward J. Delp and Ping W. Wong}, location = {San Jose, CA, USA}, month = {January}, year = {2005}, volume = {5681}, URL = {http://www.cs.purdue.edu/homes/mkarahan/ei05_5681_45.pdf}, keywords = {text watermarking, natural language processing, text steganography}, abstract = {In this paper we discuss natural language watermarking, which uses the structure of the sentence constituents in natural language text in order to insert a watermark. This approach is different from techniques, collectively referred to as text watermarking, which embed information by modifying the appearance of text elements, such as lines, words, or characters. We provide a survey of the current state of the art in natural language watermarking and introduce terminology, techniques, and tools for text processing. We also examine the parallels and differences of the two watermarking domains and outline how techniques from the image watermarking domain may be applicable to the natural language watermarking domain.} } @InProceedings{Taskiran2006, author = {Cuneyt M. Taskiran and Umut Topkara and Mercan Topkara and Edward J. Delp}, title = {Attacks on Lexical Natural Language Steganography Systems}, booktitle = {Proceedings of the SPIE International Conference on Security, Steganography, and Watermarking of Multimedia Contents}, year = {2006}, month = {January}, location = {San Jose, CA}, URL = {http://homes.cerias.purdue.edu/~mercan/spie06_6072-9_paper.pdf}, abstract = {Text data forms the largest bulk of digital data that people encounter and exchange daily. For this reason the potential usage of text data as a covert channel for secret communication is an imminent concern. Even though information hiding into natural language text has started to attract great interest, there has been no study on attacks against these applications. In this paper we examine the robustness of lexical steganography systems.In this paper we used a universal steganalysis method based on language models and support vector machines to differentiate sentences modified by a lexical steganography algorithm from unmodified sentences. The experimental accuracy of our method on classification of steganographically modified sentences was 84.9 percent. On classification of isolated sentences we obtained a high recall rate whereas the precision was low.} } @InProceedings{Topkara2006, author = {Mercan Topkara and Guiseppe Riccardi and Dilek Hakkani-Tur and Mikhail J. Atallah}, title = {Natural Language Watermarking: Challenges in Building a Practical System}, booktitle = {Proceedings of the SPIE International Conference on Security, Steganography, and Watermarking of Multimedia Contents}, year = {2006}, month = {January}, location = {San Jose, CA}, URL = {http://homes.cerias.purdue.edu/~mercan/spie06_6072-10_paper.pdf}, abstract = {This paper gives an overview of the research and implementation challenges we encountered in building an end-to-end natural language processing based watermarking system. With natural language watermarking, we mean embedding the watermark into a text document, using the natural language components as the carrier, in such a way that the modifications are imperceptible to the readers and the embedded information is robust against possible attacks. Of particular interest is using the structure of the sentences in natural language text in order to insert the watermark. We evaluated the quality of the watermarked text using an objective evaluation metric, the BLEU score. BLEU scoring is commonly used in the statistical machine translation community. Our current system prototype achieves 0.45 BLEU score on a scale [0,1].} } @InProceedings{Topkara2006a, author = {Umut Topkara and Mercan Topkara and Mikhail J. Atallah}, title = {The hiding virtues of ambiguity: quantifiably resilient watermarking of natural language text through synonym substitutions}, booktitle = {MM\&Sec '06: Proceeding of the 8th workshop on Multimedia and security}, year = {2006}, ISBN = {1-59593-493-6}, pages = {164--174}, location = {Geneva, Switzerland}, DOI = {http://doi.acm.org/10.1145/1161366.1161397}, publisher = {ACM Press}, address = {New York, NY, USA}, abstract = {Information-hiding in natural language text has mainly consisted of carrying out approximately meaning-preserving modifications on the given cover text until it encodes the intended mark. A major technique for doing so has been synonym-substitution. In these previous schemes, synonym substitutions were done until the text "confessed", i.e., carried the intended mark message. We propose here a better way to use synonym substitution, one that is no longer entirely guided by the mark-insertion process: It is also guided by a resilience requirement, subject to a maximum allowed distortion constraint. Previous schemes for information hiding in natural language text did not use numeric quantification of the distortions introduced by transformations, they mainly used heuristic measures of quality based on conformity to a language model (and not in reference to the original cover text). When there are many alternatives to carry out a substitution on a word, we prioritize these alternatives according to a quantitative resilience criterion and use them in that order. In a nutshell, we favor the more ambiguous alternatives. In fact not only do we attempt to achieve the maximum ambiguity, but we want to simultaneously be as close as possible to the above-mentioned distortion limit, as that prevents the adversary from doing further transformations without exceeding the damage threshold; that is, we continue to modify the document even after the text has "confessed" to the mark, for the dual purpose of maximizing ambiguity while deliberately getting as close as possible to the distortion limit. The quantification we use makes possible an application of the existing information-theoretic framework, to the natural language domain, which has unique challenges not present in the image or audio domains. The resilience stems from both (i) the fact that the adversary does not know where the changes were made, and (ii) the fact that automated disambiguation is a major difficulty faced by any natural language processing system (what is bad news for the natural language processing area, is good news for our scheme's resilience). In addition to the above mentioned design and analysis, another contribution of this paper is the description of the implementation of the scheme and of the experimental data obtained.} } @InProceedings{Topkara2006b, author = {Mercan Topkara and Umut Topkara and Mikhail J. Atallah}, title = {Words Are Not Enough: Sentence Level Natural Language Watermarking}, booktitle = {Proceedings of the ACM Workshop on Content Protection and Security (in conjuction with ACM Multimedia)}, year = {2006}, month = {October}, location = {Santa Barbara, CA} } @InProceedings{Topkara2007, author = {Mercan Topkara and Umut Topkara and Mikhail J. Atallah}, title = {Information hiding through errors: A confusing approach}, booktitle = {Proceedings of the SPIE International Conference on Security, Steganography, and Watermarking of Multimedia Contents}, year = {2007}, month = {January}, location = {San Jose, CA} } SECT Grothoff et al. @TechReport{Grothoff2005, author = {Christian Grothoff and Krista Grothoff and Ludmila Alkhutova and Ryan Stutsman and Mikhail Atallah}, title = {Translation-Based Steganography}, year = {2005}, URL = {https://www.cerias.purdue.edu/tools_and_resources/bibtex_archive/ archive/2005-39.pdf}, institution = {Purdue CERIAS}, number = {TR 2005-39}, abstract = {This paper investigates the possibilities of steganographically embedding information in the ``noise'' created by automatic translation of natural language documents. Because the inherent redundancy of natural language creates plenty of room for variation in translation, machine translation is ideal for steganographic applications. Also, because there are frequent errors in legitimate automatic text translations, additional errors inserted by an information hiding mechanism are plausibly undetectable and would appear to be part of the normal noise associated with translation. Significantly, it should be extremely difficult for an adversary to determine if inaccuracies in the translation are caused by the use of steganography or by deficiencies of the translation software.} } @InProceedings{Grothoff2005a, author = {Christian Grothoff and Krista Grothoff and Ludmila Alkhutova and Ryan Stutsman and Mikhail Atallah}, title = {Translation-Based Steganography}, booktitle = {Proceedings of Information Hiding Workshop (IH 2005)}, publisher = {Springer}, year = {2005}, pages = "213--233", URL = {http://grothoff.org/christian/stego.pdf}, abstract = {This paper investigates the possibilities of steganographically embedding information in the ``noise'' created by automatic translation of natural language documents. Because the inherent redundancy of natural language creates plenty of room for variation in translation, machine translation is ideal for steganographic applications. Also, because there are frequent errors in legitimate automatic text translations, additional errors inserted by an information hiding mechanism are plausibly undetectable and would appear to be part of the normal noise associated with translation. Significantly, it should be extremely difficult for an adversary to determine if inaccuracies in the translation are caused by the use of steganography or by deficiencies of the translation software.} } @InProceedings{Strutsman2006, author = {Ryan Stutsman and Mikhail Atallah and Christian Grothoff and Krista Grothoff}, title = {Lost in Just the Translation}, booktitle = {Proceedings of the 21st Annual ACM Symposium on Applied Computing (SAC 2006)}, year = {2006}, month = {April}, URL = {http://grothoff.org/christian/lijtt.pdf}, abstract = {This paper describes the design and implementation of a scheme for hiding information in translated natural language text, and presents experimental results using the implemented system. Unlike the previous work, which required the presence of both the source and the translation, the protocol presented in this paper requires only the translated text for recovering the hidden message. This is a significant improvement, as transmitting the source text was both wasteful of resources and less secure. The security of the system is now improved not only because the source text is no longer available to the adversary, but also because a broader repertoire of defenses (such as mixing human and machine translation) can now be used.} } SECT Chapman et al. @MastersThesis{Chapman1997, author = {Mark T. Chapman}, title = {Hiding The Hidden: {A} Software System for Concealing Ciphertext as Innocuous Text}, school = {University of Wisconsin-Milwaukee}, month = {May}, year = {1997}, pages = {viii + 74}, URL = {http://www.nicetext.com/doc/thesis.ps}, keywords = {ciphertext, privacy, information hiding}, abstract = {In this thesis we present a system for protecting the privacy of cryptograms to avoid detection by censors. The system transforms ciphertext into innocuous text which is transformed back into the original ciphertext. The expandable set of tools allows experimentation with custom dictionaries, automatic simulation of writing style, and the use of Context-Free Grammars to control text generation.} } @InProceedings{Chapman1997a, author = {Mark T. Chapman and George I. Davida}, title = {Hiding the Hidden: {A} Software System for Concealing Ciphertext as Innocuous Text}, booktitle = {Information and Communications Security: First International Conference}, editor = {Yongfei Han Tatsuaki, Okamoto Sihan Qing}, location = {Beijing, China}, month = {August}, year = {1997}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, volume = {1334}, ISBN = {3-540-63696-X}, URL = {http://www.nicetext.com/doc/icics97.ps}, abstract = {In this paper we present a system for protecting the privacy of cryptograms to avoid detection by censors. The system transforms ciphertext into innocuous text which can be transformed back into the original ciphertext. The expandable set of tools allows experimentation with custom dictionaries, automatic simulation of writing style, and the use of Context-Free Grammars to control text generation.} } @InProceedings{Chapman2001, author = {Mark T. Chapman and George I. Davida and Marc Rennhard}, title = {A Practical and Effective Approach to Large-Scale Automated Linguistic Steganography}, booktitle = {Information Security: Fourth International Conference}, editor = {George I. Davida and Yair Frankel}, location = {Malaga, Spain}, month = {October}, year = {2001}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, volume = {2200}, pages = {156ff}, ISSN = {0302-9743}, URL = {http://www.nicetext.com/doc/isc01.ps, http://www.tik.ee.ethz.ch/~rennhard/publications/als.pdf}, abstract = {Several automated techniques exist to transform ciphertext into text that looks like natural-language text while retaining the ability to recover the original ciphertext. This transformation changes the ciphertext so that it doesn't attract undue attention from, for example, attackers or agencies or organizations that might want to detect or censor encrypted communication. Although it is relatively easy to generate a small sample of quality text, it is challenging to be able to generate large texts that are meaningful to a human reader and which appear innocuous. This paper expands on a previous approach that used sentence models and large dictionaries of words classified by part-of-speech. By using an extensible contextual template approach combined with a synonym-based replacement strategy, much more realistic text is generated than was possible with NICETEXT.} } @InProceedings{Chapman2002, author = {Mark T. Chapman and George I. Davida}, title = {Plausible Deniability Using Automated Linguistic Steganography}, booktitle = {Infrastructure Security: International Conference}, editor = {George I. Davida and Yair Frankel}, location = {Bristol, UK}, month = {October}, year = {2002}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, volume = {2437}, pages = {276--287}, ISSN = {0302-9743}, URL = {http://www.nicetext.com/doc/infrasec02.ps}, abstract = {Information hiding has several applications, one of which is to hide the use of cryptography. The Nicetext system introduced a method for hiding cryptographic information by converting cryptographic strings (random-looking) into nice text (namely innocuous looking). The system retains the ability to recover the original ciphertext from the generated text. Nicetext can hide both plaintext and cryptographic text. The purpose of such transformations are to mask ciphertext from anyone who wants to detect or censor encrypted communication, such as a corporation that may monitor, or censor, its employee private mail. Even if the message is identified as the output of Nicetext, the sender might claim that the input was simply a pseudo-random number source rather than ciphertext. This paper extends the Nicetext protocol to enable deniable cryptography/messaging using the concepts of plausible deniability [1]. Deniability is derived from the fact that even if one is forced to reveal a key to the random string that nice text reverts to, the real cryptographic/plaintext messages may be stored within additional required sources of randomness in the extended protocol.} } SECT Richard Bergmair @Misc{Bergmair2004, author = {Richard Bergmair}, title = {Towards Linguistic Steganography: A Systematic Investigation of Approaches, Systems, and Issues}, howpublished = {final year thesis}, month = {April}, year = {2004}, note = {handed in in partial fulfillment of the degree requirements for the degree {``B.Sc. (Hons.) in Computer Studies''} to the University of Derby.}, URL = {http://bergmair.cjb.net/pub/towlingsteg-rep-inoff-a4.ps.gz}, abstract = {Steganographic systems provide a secure medium to covertly transmit information in the presence of an arbitrator. In linguistic steganography, in particular, machine-readable data is to be encoded to innocuous natural language text, thereby providing security against any arbitrator tolerating natural language as a communication medium. So far, there has been no systematic literature available on this topic, a gap the present report attempts to fill. This report presents necessary background information from steganography and from natural language processing. A detailed description is given of the systems built so far. The ideas and approaches they are based on are systematically presented. Objectives for the functionality of natural language stegosystems are proposed and design considerations for their construction and evaluation are given. Based on these principles current systems are compared and evaluated. A coding scheme that provides for some degree of security and robustness is described and approaches towards generating steganograms that are more adequate, from a linguistic point of view, than any of the systems built so far, are outlined.} } @Misc{Bergmair2004a, author = {Richard Bergmair}, title = {Natural Language Steganography and an ``AI-complete'' Security Primitive}, howpublished = {talk}, month = {December}, year = {2004}, location = {Berlin}, note = {talk held at the 21st Chaos Communication Congress}, abstract = { It is out of question, that we will have a long way to go, until we can encode our favourite MP3-files to t-shirt slogans, and distribute them by wearing them on the streets with the music industry unable to prove that something like an information exchange is taking place, but hopefully this article shows why research in natural language steganography is worth the effort. Some major ideas from steganography and computational linguistics are introduced and it is shown how they can be drawn together for security purposes. We present our technique of content-aware linguistic steganography, which is based on the general idea of using ``AI-complete'' problems as security primitives, and hope to inspire the hacker-community to come up with new creative security technologies.} } SECT Bolshakov et al. @InProceedings{Bolshakov2004, author = {Igor A. Bolshakov}, institution = {Center for Computing Research, National Polytechnic Institute, Mexico City, Mexico}, title = {A Method of Linguistic Steganography Based on Collocationally-Verified Synonymy.}, booktitle = {Information Hiding: 6th International Workshop}, editor = {Jessica J. Fridrich}, location = {Toronto, Canada}, month = {May}, year = {2004}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, volume = {3200}, ISBN = {3-540-24207-4}, DOI = {10.1007/b104759}, pages = {180--191}, abstract = {A method is proposed of the automatic concealment of digital information in rather long orthographically and semantically correct texts. The method does not change the meaning of the source text; it only replaces some words by their synonyms. Groups of absolute synonyms are used in a context independent manner, while the groups of relative synonyms are previously tested for semantic compatibility with the collocations containing the word to be replaced. A specific replacement is determined by the hidden information. The collocations are syntactically connected and semantically compatible pairs of content words; they are massively gathered beforehand, with a wide diversity in their stability and idiomacity. Thus the necessary linguistic resources are a specific synonymy dictionary and a very large database of collocations. The steganographic algorithm is informally outlined. An example of hiding binary information in a Russian text fragment is manually traced, with a rough evaluation of the steganographic bandwidth.} } @InProceedings{Calvo2004, author = {Hiram Calvo and Igor A. Bolshakov}, institution = {Center for Computing Research, National Polytechnic Institute, Mexico City, Mexico}, title = {Using Selectional Preferences for Extending a Synonymous Paraphrasing Method in Steganography}, booktitle = {Avances en Ciencias de la Computacion e Ingenieria de Computo - CIC'2004: XIII Congreso Internacional de Computacion}, editor = {J. H. Sossa Azuela}, location = {Mexico City, Mexico}, month = {October}, year = {2004}, pages = {231--242}, abstract = {Linguistic steganography allows hiding information in a text. The resulting text must be grammatically correct and semantically coherent to be unsuspicious. Among several methods of linguistic steganography we adhere to previous approaches which use synonymous paraphrasing, i.e., substituting content words by their equivalents. Context must be considered to avoid possible substitutions that break coherence (for example spicy dog instead of hot dog). We base our method on previous work in linguistic steganography that uses collocations for verifying context. We propose using selectional preferences instead of collocations because selectional preferences can be collected automatically from large corpora in a reliable manner, thus allowing our method to be applied for any language. The steganographic algorithm is informally outlined and an example of hiding binary information in a Spanish text fragment is presented, with a rough evaluation of the ratio of hidden information size to the necessary size of the original text.} } SECT Murphy et al. @MastersThesis{Murphy2001, author = {B. Murphy}, title = {Syntactic Information Hiding in Plain Text}, school = {Department of Computer Science, Trinity College Dublin}, year = {2001} } @InProceedings{Murphy2007, author = {B. Murphy and C. Vogel}, title = {The syntax of concealment: reliable methods for plain text information hiding}, booktitle = {Proceedings of the SPIE International Conference on Security, Steganography, and Watermarking of Multimedia Contents}, year = {2007}, month = {January}, location = {San Jose, CA} } @InProceedings{Murphy2007a, author = {B. Murphy and C. Vogel}, title = {Statistically constrained shallow text marking: techniques, evaluation paradigm, and results}, booktitle = {Proceedings of the SPIE International Conference on Security, Steganography, and Watermarking of Multimedia Contents}, year = {2007}, month = {January}, location = {San Jose, CA} } SECT Meral et al. @inproceedings{Meral2006, author = {H. M. Meral and B. Sankur and A. S. Ozsoy}, title = {Watermarking Tools for Turkish Texts}, booktitle = {Proceedings of the 14th IEEE Conference on Signal Processing and Communications Applications}, year = {2006}, month = {April}, pages = {1--4}, DOI = {10.1109/SIU.2006.1659827}, publisher = {IEEE}, abstract = {Text watermarking is a recent subject of natural language processing aimed to the content security and authentication information of the text documents. This study explores possible text watermarking tools for Turkish language. Various watermarking tools such as changes of morphological and syntactic structures, and swapping of synonyms and punctuations are investigated and their relative performance measured.} } @InProceedings{Meral2007, author = {H. M. Meral and B. Sankur and S. Ozsoy}, title = {Syntactic tools for natural language watermarking}, booktitle = {Proceedings of the SPIE International Conference on Security, Steganography, and Watermarking of Multimedia Contents}, year = {2007}, month = {January}, location = {San Jose, CA} } SECT Others @Misc{Winstein1999, author = {Keith Winstein}, title = {Lexical Steganography Through Adaptive Modulation of the Word Choice Hash}, month = {January}, year = {1999}, URL = {http://alumni.imsa.edu/~keithw/tlex/lsteg.ps}, note = {Was disseminated during secondary education at the Illinois Mathematics and Science Academy. The paper won the third prize in the 2000 Intel Science Talent Search.}, abstract = {Steganography provides for the embedding of information in a block of host data in conditions where perceptible modification of the host data is intolerable. Steganographic techniques are highly dependent on the character of the host data; a technique for embedding information in images might make subtle changes in hue, while a method for embedding information in audio data could exploit the limitations of the human ear by encoding the encapsulated information in inaudible frequency ranges. Current implementations of textual steganography exploit tolerances in typesetting by making minute changes in line placement and kerning in order to encapsulate hidden information, making them vulnerable to simple retypesetting attacks. This paper defines a framework for lexical steganography and discusses the details of an implementation.} } @Article{Nakagawa2001, author = {Hiroshi Nakagawa and Kouji Sampei and Tsutomu Matsumoto and Shuji Kawaguchi and Kyoto Makino and Ichiro Murase}, institution = {Information Technology Center, University of Tokyo, Japan}, title = {Text information hiding with preserved meaning -- A case for Japanese documents}, journal = {IPSJ Transaction}, volume = {42}, number = {9}, pages = {2339 - 2350}, year = {2001}, abstract = {Digital fingerprinting is being paid growing attention as a technology resolving copyright problems. Previously, researchers have only been interested in image based digital fingerprinting where secret information is hidden in images, as opposed to our the method we will put forward herein, which uses text. It is based on a paraphrasing method that is supposed to preserve meaning of the original contents. We experimentally evaluated the proposed method with Japanese manuals and user agreement forms of software, and found the paraphrased text is preserving the meaning of the original contents and closely mimics natural language.}, note = {originally published in Japanese. A similar paper was disseminated by the first author in English and is kept available for download from http://www.r.dl.itc.u-tokyo.ac.jp/~nakagawa/academic-res/finpri02.pdf} } @Misc{Tenenbaum2002, author = {Adam J. Tenenbaum}, title = {Linguistic Steganography: Passing Covert Data Using Text-Based Mimicry}, howpublished = {final year thesis}, month = {April}, year = {2002}, note = {submitted in partial fulfillment of the requirements for the degree of {``Bachelor of Applied Science''} to the University of Toronto.}, URL = {http://www.comm.utoronto.ca/~adam/downloads/AJTBAScThesis.pdf}, abstract = {The goal of linguistic steganography systems is to transmit a secret message over an open communication channel while concealing the presence of the secret message altogether. The secret message is hidden by encoding its bits within a ``cover'' message that mimics natural language. Existing text mimicry algorithms are flawed in that there exists a tradeoff between the quality of the output text and the resources required to manually design an appropriate grammar for the content of the cover message. In Peter Wayner's basic mimicry algorithm, the system learns from frequency analysis of a ``training source'' in order to attempt to mimic the source. This thesis improves upon Wayner's algorithm by changing the ``atom'' in frequency analysis from a single character to a single word. The resulting linguistic steganography algorithm generates a cover text that more closely resembles the style of the training source but also mimics the grammar of the source text in a dynamic, automated fashion.} } @Article{Niimi2003, author = {Michiharu Niimi and Sayaka Minewaki and Hideki Noda and Eiji Kawaguchi}, institution = {Kyushu Institute of Technology, Kitakyushu, Japan}, title = {A Framework of Text-based Steganography Using SD-Form Semantics Model}, journal = {IPSJ Journal}, volume = {44}, number = {8}, month = {August}, year = {2003}, URL = {http://www.know.comp.kyutech.ac.jp/ STEG03/STEG03-PAPERS/papers/12-Niimi.pdf}, abstract = {This paper describes a framework of text-base steganography in consideration of the meaning of natural language sentences. To deal with the meaning of sentences, this method uses SD-Form Semantics Model that has been developed by the authors. In the model, sentences are described by the form named SD-Form. An SD-Form is assigned an amount of semantic information. The amount of the meaning of sentences is used to carry secret information on text data. In embedding secret information, sentences are transformed to SD-Forms and then the amount of semantic information of SD-Forms is decreased or increased to coincide with the value of the secret information. We show methods to decrease or increase the amount of the meaning of SD-Forms.} } @InProceedings{Chiang2003, author = {Yuei-Lin Chiang and Lu-Ping Chang and Wen-Tai Hsieh and Wen-Chih Chen}, institution = {Advanced e-Commerce Technology Laboratory, Institute for Information Industry, Taipei, Taiwan}, title = {Natural Language Watermarking Using Semantic Substitution for Chinese Text}, booktitle = {Digital Watermarking: Second International Workshop, IWDW 2003}, editor = {Ton Kalker and Ingemar J. Cox and Yong Man Ro}, location = {Seoul, Korea}, month = {October}, year = {2003}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, volume = {2939}, pages = {129--140}, ISBN = {3-540-21061-X}, DOI = {10.1007/b95658}, abstract = {Numerous schemes have been designed for watermarking multimedia contents. Many of these schemes are vulnerable to watermark erasing attacks. Naturally, such methods are ineffective on text unless the text is represented as a bitmap image, but in that case, the watermark can be erased easily by using Optical Character Recognition (OCR) to change the representation of the text from a bitmap to ASCII or EBCDIC. This study attempts to develop a method for embedding watermark in the text that is as successful as the frequency-domain methods have been for image and audio. The novel method embeds the watermark in original text, creating ciphertext, which preserves the meaning of the original text via various semantic replacements.} } @InProceedings{Sun2004, author = {Xingming Sun and Gang Luo and Huajun Huang}, title = {Component-based digital watermarking of Chinese texts}, booktitle = {InfoSecu '04: Proceedings of the 3rd international conference on Information security}, year = {2004}, ISBN = {1-58113-955-1}, pages = {76--81}, location = {Shanghai, China}, DOI = {http://doi.acm.org/10.1145/1046290.1046306}, publisher = {ACM Press}, abstract = {According to the types of the host media, digital watermarking may be classified mainly as image watermarking, video watermarking, audio watermarking, and text watermarking. The principle of the three watermarking research fields are similar in that they make use of the redundant information of their host media and the characteristics of human video system or human audio system. Unfortunately, text has no redundant information. Text watermarking techniques are totally different from them. And text watermarking algorithm is very difficult to satisfy the requirements of transparence and robustness. In this paper, a novel text watermarking algorithm based on the thought of the mathematical expression will be presented. Since watermarking signals are embedded into some Chinese characters that can be divided into left and right components, this algorithm is totally based on the content. Therefore, it breaks through the difficulties of text watermarking. Experiments also show that the component-based text watermarking technique is relatively robust and transparent. It will play an important role in protecting the security of Chinese documents over Internet.} } @InProceedings{Uzuner2006, author = {Ozlem Uzuner}, title = {Natural language processing with linguistic information for digital fingerprinting and watermarking}, booktitle = {Proceedings of the SPIE International Conference on Security, Steganography, and Watermarking of Multimedia Contents}, year = {2006}, month = {January}, location = {San Jose, CA} } @InProceedings{Chand2006, author = {V. Chand and C. O. Orgun}, title = {Exploiting Linguistic Features in Lexical Steganography: Design and Proof-of-Concept Implementation}, booktitle = {Proceedings of the 39th Annual Hawaii International Conference on System Sciences (HICSS '06)}, year = {2006}, month = {January}, ISBN = {0-7695-2507-5}, ISSN = {1530-1605}, DOI = {10.1109/HICSS.2006.175}, volume = {6}, pages = {126b}, location = {Hawaii}, publisher = {IEEE}, abstract = {This paper develops a linguistically robust encryption, LUNABEL, which converts a message into semantically innocuous text. Drawing upon linguistic criteria, LUNABEL uses word replacement, with substitution classes based on traditional word replacement features (syntactic categories and sub-categories), as well as features under-exploited in earlier works: semantic criteria, graphotactic structure, inflectional class and frequency statistics. The original message is further hidden through the use of cover texts — within these, LUNABEL retains all function words and targets specific classes of content words for replacement, creating text which preserves the syntactic structure and semantic context of the original cover text. LUNABEL takes advantage of cover text styles which are not expected to be necessarily comprehensible to the general public, making any semantic anomalies more opaque. This line of work has the promise of creating encrypted texts which are less detectable than earlier steganographic efforts.} } @InProceedings{Hassan2006, author = {M. Hassan Shirali-Shahreza and Mohammad Shirali-Shahreza}, title = {A New Approach to Persian/Arabic Text Steganography}, booktitle = {Proceedings of the 5th IEEE/ACIS International Conference on Computer and Information Science}, year = {2006}, ISBN = {0-7695-2613-6}, pages = {310--315}, DOI = {http://dx.doi.org/10.1109/ICIS-COMSAR.2006.10}, publisher = {IEEE Computer Society}, address = {Washington, DC, USA}, abstract = {Conveying information secretly and establishing hidden relationship has been of interest since long past. Text documents have been widely used since very long time ago. Therefore, we have witnessed different method of hiding information in texts (text steganography) since past to the present. In this paper we introduce a new approach for steganography in Persian and Arabic texts. Considering the existence of too many points in Persian and Arabic phrases, in this approach, by vertical displacement of the points, we hide information in the texts. This approach can be categorized under feature coding methods. This method can be used for Persian/Arabic Watermarking. Our method has been implemented by JAVA programming language.} } @InProceedings{Macq2007, author = {B. Macq and O. Vybornova}, title = {A method of text watermarking using presuppositions}, booktitle = {Proceedings of the SPIE International Conference on Security, Steganography, and Watermarking of Multimedia Contents}, year = {2007}, month = {January}, location = {San Jose, CA} } This is awaiting approval: Article{Liu2007, author = {Tsung-Yuan Liu and Wen-Hsiang Tsai}, institution = {National Chiao Tung University}, title = {A New Steganographic Method for Data Hiding in Microsoft Word Documents by A Change Tracking Technique }, journal = {IEEE Transactions on Information Forensics and Security}, note = {to appear}, year = {2007}, abstract = {A new steganographic method for data hiding in Microsoft Word documents by a change tracking technique is proposed. The data embedding is disguised such that the stego-document appears to be the product of a collaborative writing effort. Text segments in the document are degenerated, mimicking to be the work of an author with inferior writing skill, with the secret message embedded in the choices of degenerations. The degenerations are then revised with the changes being tracked, making it appear as if a cautious author is correcting the mistakes. The change tracking information contained in the stego-document allows the original cover, the degenerated document, and hence the secret message, to be recovered. The extra change tracking information added during message embedding is vital in a normal collaboration scenario, and so hinders ignorant removals by skeptics. Experiments demonstrate the feasibility of the proposed method.} } CHAPTER RELATED BIBLIOGRAPHY @InProceedings{Atallah2000related, author = {Mikhail J. Atallah and Craig J. McDonough and Victor Raskin and Sergei Nirenburg}, institution = {Purdue CERIAS}, title = {Natural language processing for information assurance and security: an overview and implementations}, booktitle = {NSPW '00: Proceedings of the 2000 workshop on New security paradigms}, editor = {Mary Ellen Zurko and Steven J. Greenwald}, location = {Ballycotton, County Cork, Ireland}, month = {September}, year = {2000}, publisher = {ACM Press}, ISBN = {1-58113-260-3}, pages = {51--65}, DOI = {http://doi.acm.org/10.1145/366173.366190}, abstract = {The paper introduces and advocates an ontological semantic approach to information security. Both the approach and its resources, the ontology and lexicons, are borrowed from the field of natural language processing and adjusted to the needs of the new domain. The approach pursues the ultimate dual goals of inclusion of natural language data sources as an integral part of the overall data sources in information security applications, and formal specification of the information security community know-how for the support of routine and time-efficient measures to prevent and counteract computer attacks. As the first order of the day, the approach is seen by the information security community as a powerful means to organize and unify the terminology and nomenclature of the field.} } @inproceedings{Raskin2001related, author = {Victor Raskin and Christian F. Hempelmann and Katrina E. Triezenberg and Sergei Nirenburg}, institution = {Purdue CERIAS}, title = {Ontology in information security: a useful theoretical foundation and methodological tool}, booktitle = {NSPW '01: Proceedings of the 2001 workshop on New security paradigms}, editor = {Victor Raskin and Steven J. Greenwald}, location = {Cloudcroft, New Mexico}, month = {September}, year = {2001}, publisher = {ACM Press}, ISBN = {1-58113-457-6}, pages = {53--59}, DOI = {http://doi.acm.org/10.1145/508171.508180}, URL = {http://omni.cc.purdue.edu/~vraskin/NSPW-2001.pdf}, abstract = {The paper introduces and advocates an ontological semantic approach to information security. Both the approach and its resources, the ontology and lexicons, are borrowed from the field of natural language processing and adjusted to the needs of the new domain. The approach pursues the ultimate dual goals of inclusion of natural language data sources as an integral part of the overall data sources in information security applications, and formal specification of the information security community know-how for the support of routine and time-efficient measures to prevent and counteract computer attacks. As the first order of the day, the approach is seen by the information security community as a powerful means to organize and unify the terminology and nomenclature of the field.} } @Book{Wayner2002related, author = {Peter Wayner}, title = {Disappearing Cryptography -- Information Hiding: Steganography \& Watermarking}, publisher = {Morgan Kaufmann Publishers}, address = {Los Altos, CA 94022, USA}, edition = {Second}, pages = {xvii + 413}, year = {2002}, ISBN = {1-55860-769-2}, price = {USD 44.95}, abstract = {Disappearing Cryptography, Second Edition describes how to take words, sounds, or images and hide them in digital data so that they look like other words, sounds, or images. When used properly, this powerful technique makes it almost impossible to trace the author or the recipient of a message. Conversations can be submerged in the flow of information through the Internet so that no one can know if a conversation exists at all. This full revision of the best-selling first edition describes a number of different techniques to hide information. These techniques include encryption (making data incomprehensible), steganography (embedding information into video, audio, or graphic files), watermarking (hiding data in the noise of image or sound files), mimicry ("dressing up" data and making it appear to be other data), and others. This second edition also includes an expanded discussion on hiding information with spread-spectrum algorithms, shuffling tricks, and synthetic worlds. Each chapter is divided into sections, first providing an introduction and high-level summary for those who want to understand the concepts without wading through technical explanations, and then presenting greater detail for those who want to write their own programs.}, note = {Chapters 6 and 7 serve as good introductions to mimic functions} } @InProceedings{Bolshakov2004related, author = {Igor A. Bolshakov and Alexander Gelbukh}, institution = {Center for Computing Research, National Polytechnic Institute, Mexico City, Mexico and Department of Computer Science and Engineering, Chung-Ang University, Seoul, Korea}, title = {Synonymous Paraphrasing Using WordNet and Internet}, booktitle = {Natural Language Processing and Information Systems: 9th International Conference on Applications of Natural Language to Information Systems, NLDB 2004}, editor = {Farid Meziane and Elisabeth Elisabeth Metais}, location = {Salford, UK}, month = {June}, year = {2004}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, volume = {3136}, ISBN = {3-540-22564-1}, DOI = {3-540-22564-1}, pages = {312--323}, abstract = {We propose a method of synonymous paraphrasing of a text based on WordNet synonymy data and Internet statistics of stable word combinations (collocations). Given a text, we look for words or expressions in it for which WordNet provides synonyms, and substitute them with such synonyms only if the latter form valid collocations with the surrounding words according to the statistics gathered from Internet. We present two important applications of such synonymous paraphrasing: (1) style-checking and correction: automatic evaluation and computer-aided improvement of writing style with regard to various aspects (increasing vs. decreasing synonymous variation, conformistic vs. individualistic selection of synonyms, etc.) and (2) steganography: hiding of additional information in the text by special selection of synonyms. A basic interactive algorithm of style improvement is outlined and an example of its application to editing of newswire text fragment in English is traced. Algorithms of style evaluation and information hiding are also proposed.} } @InProceedings{Bergmair2004related, author = {Richard Bergmair and Stefan Katzenbeisser}, title = {Towards Human Interactive Proofs in the Text-Domain}, booktitle = {Proceedings of the 7th Information Security Conference}, pages = {257--267}, year = {2004}, editor = {Kan Zhang and Yuliang Zheng}, volume = {3225}, series = {Lecture Notes in Computer Science}, month = {September}, publisher = {Springer Verlag}, location = {Palo Alto, CA}, url = {http://bergmair.cjb.net/pub/towhiptext-proc.ps.gz}, abstract = {We outline the linguistic problem of word-sense ambiguity and demonstrate its relevance to current computer security applications in the context of Human Interactive Proofs (HIPs). Such proofs enable a machine to automatically determine whether it is interacting with another machine or a human. HIPs were recently proposed to fight abuse of web services, denial-of-service attacks and spam. We describe the construction of an HIP that relies solely on natural language and draws its security from the problem of word-sense ambiguity, i.e., the linguistic phenomenon that a word can have different meanings dependent on the context it is used in.} } @TechReport{Bergmair2005related, author = {Richard Bergmair and Stefan Katzenbeisser}, title = {Content-Aware Steganography: About Lazy Prisoners and Narrow-Minded Wardens}, year = 2005, month = dec, url = {http://richard.bergmair.eu/pub/hipstego-doc.pdf}, institution = {Technische Universit\"at M\"unchen, Institut f\"ur Informatik AI/Cognition Group}, issn = {0941-6358}, abstract = {We introduce content-aware steganography as a new paradigm of steganography stemming from a shift in perspectives towards the objects of steganography. In particular, we abandon the point of view that steganographic objects can be considered pieces of data, suggesting that they should rather be considered pieces of information. We provide some evidence to suggest that this shift in perspectives is in fact necessary, and pinpoint a semantic problem that has not received sufficient attention in the past. We also propose a solution to this problem, by putting forward a new kind of steganography that employs human interactive proofs as a security primitive.}, number = {fki-252-05} } @InProceedings{Bergmair2006related, author = {Richard Bergmair and Stefan Katzenbeisser}, title = {Content-Aware Steganography: About Lazy Prisoners and Narrow-Minded Wardens}, booktitle = {Proceedings of the 8th Information Hiding Workshop}, year = {2006}, series = {Lecture Notes in Computer Science}, publisher = {Springer Verlag}, location = {Alexandria, VA}, abstract = {We introduce content-aware steganography as a new paradigm of steganography stemming from a shift in perspectives towards the objects of steganography. In particular, we abandon the point of view that steganographic objects can be considered pieces of data, suggesting that they should rather be considered pieces of information. We provide some evidence to suggest that this shift in perspectives is in fact necessary, and pinpoint a semantic problem that has not received sufficient attention in the past. We also propose a solution to this problem, by putting forward a new kind of steganography that employs human interactive proofs as a security primitive.}, note = {in print} } CHAPTER IMPLEMENTED SYSTEMS @Misc{Walker1994impl, author = {John Walker}, title = {Steganosaurus}, month = {December}, year = {1994}, howpublished = {circulating on the web}, URL = {http://www.fourmilab.ch/stego/stego.shar.gz}, note = {\url{http://www.fourmilab.ch/stego/stego.shar.gz}, accessed 2005-03-25}, abstract = {Steganosaurus is a plain text steganography (secret writing) utility which encodes a (usually encrypted) binary file as gibberish text, based on either a spelling dictionary or words taken from a text document. In portable C; public domain.} } @Misc{Maher1995impl, author = {Kevin Maher}, title = {Texto}, month = {February}, year = {1995}, howpublished = {circulating on the web}, URL = {http://www.ecn.org/crypto/soft/texto.zip}, note = {\url{http://www.ecn.org/crypto/soft/texto.zip}, accessed 2005-03-22}, abstract = {Texto is a rudimentary text steganography program which transforms uuencoded or pgp ascii-armoured ascii data into English sentences. This program was written to facilitate the exchange of binary data, especially encrypted data. Why is this necessary? People or programs may be reading your mail. Recent events in the US congress may _require_ Internet Service Providers to monitor incoming mail and determine whether or not it is "obscene" or lives up to particular parochial moral standards. Since they can't scan the contents of an encrypted message, and probably don't have time to manually look at each uuencoded message, such emails will probably go into the bit bucket. This program's output is hopefully close enough to normal English text that it will slip by any kind of automated scanning.} } @Misc{Chapman1997impl, author = {Mark T. Chapman and George I. Davida}, title = {NICETEXT}, month = {August}, year = {1997}, howpublished = {Website}, URL = {http://www.nicetext.com/}, note = {\url{http://www.nicetext.com/}, accessed 2005-03-09}, abstract = {NICETEXT is a package that converts any file into pseudo-natural-language text. It also has the ability to recover the original file from the text! The expandable set of tools allows experimentation with custom dictionaries, automatic simulation of writing style, and the use of Context-Free-Grammars to control text generation.} } @Misc{Wayner1997impl, author = {Peter Wayner}, title = {Mimicry Applet}, month = {August}, year = {1997}, howpublished = {Website}, URL = {http://www.wayner.org/texts/mimic/}, note = {\url{http://www.wayner.org/texts/mimic/}, accessed 2004-04-12}, abstract = {This applet shows how data can be mutated into innocent sounding plaintext with the push of a button. In this case, the destination is a the voiceover from a hypothetical baseball game between two teams named the Blogs and the Whappers. The information is encoded by choosing the words, the players and the action in the game. In some cases, one message will lead to a string of homeruns and in other cases a different message will strike out three players in a row.} } @Misc{Winstein1999impl, author = {Keith Winstein}, title = {Tyrannosaurus Lex}, month = {January}, year = {1999}, howpublished = {Website}, URL = {http://alumni.imsa.edu/~keithw/tlex/}, note = {\url{http://alumni.imsa.edu/~keithw/tlex/}, accessed 2005-03-09}, abstract = {Steganography is a field concerned with hiding information, typically within some unsuspicious "carrier". For instance, an online news site might use steganographic watermarking to encode their images with some copyright notice, allowing them to easily search for copies of the same images on another web site by searching for images containing the watermark. Schemes for hiding data in blocks of text exist, but are usually dependent on being able to modify the physical appearance of the text - usually by subtly moving lines up and down, etc. Lexical steganography is the encoding of data in blocks of text on the lexical, or word, level.} } @Misc{Hugg1999impl, author = {Steven E. Hugg}, title = {Stegparty}, month = {November}, year = {1999}, howpublished = {Website}, URL = {http://www.fasterlight.com/hugg/projects/stegparty.html}, note = {\url{http://www.fasterlight.com/hugg/projects/stegparty.html}, accessed 2005-03-25}, abstract = {StegParty is a system for hiding information inside of plain-text files. Unlike similar tools currently available it does not use random gibberish to encode data -- it relies on small alterations to the message, like changes to spelling and punctuation. Because of this you can use any plain-text file as your carrier , and it will be more-or-less understandable after the secret message is embedded.} } @Misc{McKellar2000impl, author = {David McKellar}, title = {Spammimic}, month = {June}, year = {2000}, howpublished = {Website}, URL = {http://www.spammimic.com/}, note = {\url{http://www.spammimic.com/}, accessed 2004-04-12}, abstract = {There is tons of spam flying around the Internet. Most people can't delete it fast enough. It's virtually invisible. This site gives you access to a program that will encrypt a short message into spam. Basically, the sentences it outputs vary depending on the message you are encoding. Real spam is so stupidly written it's sometimes hard to tell the machine written spam from the genuine article.} } @Misc{Shields2001impl, author = {Paul Shields}, title = {Stegano}, month = {November}, year = {2001}, howpublished = {circulating on the web}, URL = {http://zooid.org/~paul/crypto/natlang/stegano-1.02.tar.gz}, note = {\url{http://zooid.org/~paul/crypto/natlang/stegano-1.02.tar.gz}, accessed 2005-03-25}, abstract = {This is a small set of heuristic tools intended for use in steganographic writings. How you use these tools is up to you.} } @Misc{Zalewski2002impl, author = {Michal Zalewski}, title = {snowdrop}, month = {September}, year = {2002}, howpublished = {freshmeat entry}, URL = {http://freshmeat.net/projects/snowdrop/}, note = {\url{http://freshmeat.net/projects/snowdrop/}, accessed 2005-03-20}, abstract = {snowdrop is a steganographic text document and C code watermarking tool that uses redundant, tamper-evident and modification-proof information embedded in the content itself, instead of the medium, to simplify tracking of proprietary code leaks, sensitive information disclosure, etc.} } @Misc{ComprisXXXXimpl, CorpAuthor = {Compris Intelligence GmbH}, key = {Compris Intelligence GmbH}, institution = {Compris Intelligence GmbH}, title = {TextHide}, howpublished = {Website}, URL = {http://www.texthide.com/}, note = {\url{http://www.texthide.com/}, accessed 2005-03-20}, abstract = {Compris Intelligence GmbH has developed a program which can automatically reformulate text. Nevertheless, the meaning of the text is retained completely. This feature can also be used to hide data in normal text. The technical term for this is steganography.} } @Misc{ComprisXXXXimpla, CorpAuthor = {Compris Intelligence GmbH}, key = {Compris Intelligence GmbH}, institution = {Compris Intelligence GmbH}, title = {TextSign}, howpublished = {Website}, URL = {\url{http://www.textmark.com/}, http://www.textmark.com/}, note = {accessed 2005-03-20}, abstract = {Scanning, speech recognition, internet downloading, intelligent text processing systems: Text processing becomes increasingly simple, writing good texts remains difficult. With TextMark you protect your intellectual property. This innovation distinguishes itself by its broad applicability to all kinds of textual documents and its tamper proof characteristic.} }