Twelve Ways to Style The Twelve Days

By: Kevin Jones
Senior Consultant, Embedded Systems

6th December 2018

Home » c code

At this time of year I sometimes think about a now-famous piece of obfuscated C code. This was the winning entry in the 1988 International Obfuscated C Code Contest [see reference 1] and, believe it or not, will print the entire lyrics to “The Twelve Days of Christmas”:

#include <stdio.h>
main(t,_,a)
char *a;
{
return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,
"@n'+,#'/*{}w+/w#cdnr/+,{}r/*de}+,/*{*+,/w{%+,/w#q#n+,/#{l+,/n{n+,/+#n+,/#\
;#q#n+,/+k#;*+,/'r :'d*'3,}{w+K w'K:'+}e#';dq#'l \
q#'+d'K#!/+k#;q#'r}eKK#}w'r}eKK{nl]'/#;#q#n'){)#}w'){){nl]'/+#n';d}rw' i;# \
){nl]!/n{n#'; r{#w'r nc{nl]'/#{l,+'K {rw' iK{;[{nl]'/w#q#n'wk nw' \
iwk{KK{nl]!/w{%'l##w#' i; :{nl]'/*{q#'ld;r'}{nlwb!/*de}'c \
;;{nl'-{}rw]'/+,}##'*}#nc,',#nw]'/+kd'+e}+;#'rdq#w! nr'/ ') }+}{rl#'{n' ')# \
}'+}##(!!/")
:t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
:0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,
"!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:\nuwloca-O;m .vpbks,fxntdCeghiry"),a+1);
}

There are plenty of articles online that explain how this works; just use your favourite search engine to find out more [see reference 2]. Rather than repeat those descriptions, I’ll stick to the festive theme that prompted this blog and illustrate twelve ways to improve the readability of this wonderful algorithm.

1) Extract the Constant Strings

There are two strings lurking in the source code. The longer string contains an encoded version of the song lyrics and the shorter string contains a substitution cipher to decode the lyrics. The source code is immediately much clearer if these strings are declared near the beginning of the file. In the following example, “static” limits the scope of the strings to one source file and “const” declares them as read-only. I like to use uppercase symbols for constants but that’s nothing more than personal choice. Here’s the modified source code:

#include <stdio.h>

static const char *CIPHER =
"!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:\nuwloca-O;m .vpbks,fxntdCeghiry";

static const char *LYRICS =
"@n'+,#'/*{}w+/w#cdnr/+,{}r/*de}+,/*{*+,/w{%+,/w#q#n+,/#{l+,/n{n+,/+#n+,/#\
;#q#n+,/+k#;*+,/'r :'d*'3,}{w+K w'K:'+}e#';dq#'l \
q#'+d'K#!/+k#;q#'r}eKK#}w'r}eKK{nl]'/#;#q#n'){)#}w'){){nl]'/+#n';d}rw' i;# \
){nl]!/n{n#'; r{#w'r nc{nl]'/#{l,+'K {rw' iK{;[{nl]'/w#q#n'wk nw' \
iwk{KK{nl]!/w{%'l##w#' i; :{nl]'/*{q#'ld;r'}{nlwb!/*de}'c \
;;{nl'-{}rw]'/+,}##'*}#nc,',#nw]'/+kd'+e}+;#'rdq#w! nr'/ ') }+}{rl#'{n' ')# \
}'+}##(!!/";

main(t,_,a)
char *a;
{
return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,LYRICS)
:t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
:0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

2) Reformat the Strings

The two strings are recursively indexed to decode and display the lyrics. Reformatting these strings helps to understand the algorithm. In the next step the lyrics and the cipher are declared using C’s alternative string concatenation syntax. The substitution cipher is split after the 31st character, the lyrics are split at each “/” character and the original code’s “\” concatenation in the lyrics string has been eliminated. There’s also some indentation to improve readability:

#include <stdio.h>

static const char *CIPHER =
     "!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:"
    "\nuwloca-O;m .vpbks,fxntdCeghiry";

static const char *LYRICS =
    "@n'+,#'/"
    "*{}w+/"
    "w#cdnr/"
    "+,{}r/"
    "*de}+,/"
    "*{*+,/"
    "w{%+,/"
    "w#q#n+,/"
    "#{l+,/"
    "n{n+,/"
    "+#n+,/"
    "#;#q#n+,/"
    "+k#;*+,/"
    "'r :'d*'3,}{w+K w'K:'+}e#';dq#'l q#'+d'K#!/"
    "+k#;q#'r}eKK#}w'r}eKK{nl]'/"
    "#;#q#n'){)#}w'){){nl]'/"
    "+#n';d}rw' i;# ){nl]!/"
    "n{n#'; r{#w'r nc{nl]'/"
    "#{l,+'K {rw' iK{;[{nl]'/"
    "w#q#n'wk nw' iwk{KK{nl]!/"
    "w{%'l##w#' i; :{nl]'/"
    "*{q#'ld;r'}{nlwb!/"
    "*de}'c ;;{nl'-{}rw]'/"
    "+,}##'*}#nc,',#nw]'/"
    "+kd'+e}+;#'rdq#w! nr'/"
    " ') }+}{rl#'{n' ')# }'+}##(!!/";

main(t,_,a)
char *a;
{
return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,LYRICS)
:t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
:0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

3) Add Some Comments

All good programmers know that comments are useful. Annotating the constant strings immediately helps understand the algorithm even before any changes are made to the function. I prefer block comments for maximum portability but end-of-line comments would work just as well:

static const char *CIPHER =
    /*
    * Substitution cipher:
    * Replace '!' with '\n'
    * Replace 'e' with 'u'
    * ...
    * Replace ':' with 'y'
    */
     "!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:"
    "\nuwloca-O;m .vpbks,fxntdCeghiry";

static const char *LYRICS =
    "@n'+,#'/"  /* On the */
    "*{}w+/"    /* first */
    "w#cdnr/"   /* second */
    "+,{}r/"    /* third */
    "*de}+,/"   /* fourth */
    "*{*+,/"    /* fifth */
    "w{%+,/"    /* sixth */
    "w#q#n+,/"  /* seventh */
    "#{l+,/"    /* eighth */
    "n{n+,/"    /* ninth */
    "+#n+,/"    /* tenth */
    "#;#q#n+,/" /* eleventh */
    "+k#;*+,/"  /* twelfth */

    /* day of Christmas my true love gave to me\n */
    "'r :'d*'3,}{w+K w'K:'+}e#';dq#'l q#'+d'K#!/"

    /* twelve drummers drumming, */
    "+k#;q#'r}eKK#}w'r}eKK{nl]'/"

    /* eleven pipers piping, */
    "#;#q#n'){)#}w'){){nl]'/"

    /* ten lords a-leaping,\n */
    "+#n';d}rw' i;# ){nl]!/"

    /* nine ladies dancing, */
    "n{n#'; r{#w'r nc{nl]'/"

    /* eight maids a-milking, */
    "#{l,+'K {rw' iK{;[{nl]'/"

    /* seven swans a-swimming,\n */
    "w#q#n'wk nw' iwk{KK{nl]!/"

    /* six geese a-laying, */
    "w{%'l##w#' i; :{nl]'/"

    /* five gold rings;\n */
    "*{q#'ld;r'}{nlwb!/"

    /* four calling birds, */
    "*de}'c ;;{nl'-{}rw]'/"

    /* three french hens, */
    "+,}##'*}#nc,',#nw]'/"

    /* two turtle doves\nand */
    "+kd'+e}+;#'rdq#w! nr'/"

    /* a partridge in a pear tree */
    " ') }+}{rl#'{n' ')# }'+}##(!!/";

4) Indent the Function Contents

This step is straight forward… simply indent the contents of the single function:

main(t,_,a)
char *a;
{
    return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
    1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
    main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,LYRICS)
    :t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
    :0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

5) Eliminate Implicit Types

If you’re compiling the examples then you may have noticed that there are plenty of implicit type warnings. The main function uses an old C syntax to declare parameter “a” as pointer-to-char. The remaining parameters are implicit integers and the return type of the main function is also an implicit integer. These are fixed in the next sample while taking the opportunity to include the “const” type qualifier:

int main(int t, int _, const char *a)
{
    return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
    1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
    main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,LYRICS)
    :t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
    :0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

6) Rename the Underscore Parameter

The second parameter of the main function is a single underscore. While this is perfectly legal in C, it’s a lot harder to read than an alphabetic character. The next sample has replaced all instances of the underscore parameter with the single character “b”:

int main(int t, int b, const char *a)
{
    return!0<t?t<3?main(-79,-13,a+main(-87,1-b,main(-86,0,a+1)+a)):
    1,t<b?main(t+1,b,a):3,main(-94,-27+t,a)&&t==2?b<13?
    main(2,b+1,"%s %d %d\n"):9:16:t<0?t<-72?main(b,t,LYRICS)
    :t<-50?b==*a?putchar(31[a]):main(-65,b,a+1):main((*a=='/')+t,b,a+1)
    :0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

7) Reformat the Ternary Conditional Operator

The obfuscated implementation makes extensive use of the ternary conditional operator. The next sample reformats these ternary operators with some additional whitespace:

int main(int t, int b, const char *a)
{
    return
    !0<t
    ?
        t<3
        ?
            main(-79,-13,a+main(-87,1-b,main(-86,0,a+1)+a))
        :
            1, t<b
            ?
                main(t+1,b,a)
            :
                3, main(-94,-27+t,a)&&t==2
                ?
                    b<13
                    ?
                        main(2,b+1,"%s %d %d\n")
                    :
                        9
                :
                    16
    :
        t<0
        ?
            t<-72
            ?
                main(b,t,LYRICS)
            :
                t<-50
                ?
                    b==*a
                    ?
                        putchar(31[a])
                    :
                        main(-65,b,a+1)
                :
                    main((*a=='/')+t,b,a+1)
        :
            0<t
            ?
                main(2,2,"%s")
            :
                *a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

8) Replace the Ternary Conditional Statements with If-Else

A simple replacement of the ternary conditional statements with if-else statements improves the presentation of the algorithm. In the following sample, there is yet more white space and the multiple return points have been rationalised into a single point of return by using a temporary integer variable “r”:

int main(int t, int b, const char *a)
{
    int r;
    if (!0 < t)
    {
        if (t < 3)
        {
            main(-79, -13, a + main(-87, 1 - b, main(-86, 0, a + 1) + a));
        }
        else
        {
            1;
        }
        if (t < b)
        {
            main(t + 1, b, a);
        }
        else
        {
            3;
        }
        if (main(-94, -27 + t, a) && t == 2)
        {
            if (b < 13)
            {
                r = main(2, b + 1, "%s %d %d\n");
            }
            else
            {
                r = 9;
            }
        }
        else
        {
            r = 16;
        }
    }
    else
    {
        if (t < 0)
        {
            if (t < -72)
            {
                r = main(b, t, LYRICS);
            }
            else
            {
                if (t < -50)
                {
                    if (b == *a)
                    {
                        r = putchar(31[a]);
                    }
                    else
                    {
                        r = main(-65, b, a + 1);
                    }
                }
                else
                {
                    r = main((*a == '/') + t, b, a + 1);
                }
            }
        }
        else
        {
            if (0 < t)
            {
                r = main(2, 2, "%s");
            }
            else
            {
                r = *a == '/' || main(0, main(-61, *a, CIPHER), a + 1);
            }
        }
    }
    return r;
}

9) Simplify an Array Access

This change is straight forward. The array access “31[a]” is exactly the same as “a[31]” yet the latter is much easier to understand in the context of the 31 character substitution cipher:

    ...
    r = putchar(a[31]);
    ...

10) Add Brackets to Logical Expressions

Personally, I prefer plenty of brackets in compound logical expressions. The following snips are small improvements:

    ...
    if (main(-94, -27 + t, a) && (t == 2))
    ...

    ...
    r = ((*a == '/') || (main(0, main(-61, *a, CIPHER), a + 1)));
    ...

11) Remove Misdirection

There are several instances where the implementation is deliberately misleading. Given the reformatted source code so far, it’s easy to see that there are two “else” clauses with no side effects (“else {1;}” and “else {3;}”). Less obvious is that the parameters that look like string formatting specifiers passed to the main function are unused in that iteration of the function and can be replaced with zero:

    ...
    if (b < 13)
    {
        r = main(2, b + 1, 0);
    }
    ...

    ...
    r = main(2, 2, 0);
    ...

12) Implement a Stand-Alone Recursive Function

The entire algorithm so far uses the single “main” function in an unusual recursive manner. After the last of the twelve changes, the main function looks like a standard C main function and there’s a separate local-scope (static) recursive function which, in time-honoured programmer’s tradition, is called “foo”. This gives the final complete sample after all twelve changes:

#include <stdio.h>

static const char *CIPHER =
    /*
    * Substitution cipher:
    * Replace '!' with '\n'
    * Replace 'e' with 'u'
    * ...
    * Replace ':' with 'y'
    */
     "!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:"
    "\nuwloca-O;m .vpbks,fxntdCeghiry";

static const char *LYRICS =
    "@n'+,#'/"  /* On the */
    "*{}w+/"    /* first */
    "w#cdnr/"   /* second */
    "+,{}r/"    /* third */
    "*de}+,/"   /* fourth */
    "*{*+,/"    /* fifth */
    "w{%+,/"    /* sixth */
    "w#q#n+,/"  /* seventh */
    "#{l+,/"    /* eighth */
    "n{n+,/"    /* ninth */
    "+#n+,/"    /* tenth */
    "#;#q#n+,/" /* eleventh */
    "+k#;*+,/"  /* twelfth */

    /* day of Christmas my true love gave to me\n */
    "'r :'d*'3,}{w+K w'K:'+}e#';dq#'l q#'+d'K#!/"

    /* twelve drummers drumming, */
    "+k#;q#'r}eKK#}w'r}eKK{nl]'/"

    /* eleven pipers piping, */
    "#;#q#n'){)#}w'){){nl]'/"

    /* ten lords a-leaping,\n */
    "+#n';d}rw' i;# ){nl]!/"

    /* nine ladies dancing, */
    "n{n#'; r{#w'r nc{nl]'/"

    /* eight maids a-milking, */
    "#{l,+'K {rw' iK{;[{nl]'/"

    /* seven swans a-swimming,\n */
    "w#q#n'wk nw' iwk{KK{nl]!/"

    /* six geese a-laying, */
    "w{%'l##w#' i; :{nl]'/"

    /* five gold rings;\n */
    "*{q#'ld;r'}{nlwb!/"

    /* four calling birds, */
    "*de}'c ;;{nl'-{}rw]'/"

    /* three french hens, */
    "+,}##'*}#nc,',#nw]'/"

    /* two turtle doves\nand */
    "+kd'+e}+;#'rdq#w! nr'/"

    /* a partridge in a pear tree */
    " ') }+}{rl#'{n' ')# }'+}##(!!/";

static int foo(int t, int b, const char *a)
{
    int r;
    if (!0 < t)
    {
        if (t < 3)
        {
            foo(-79, -13, a + foo(-87, 1 - b, foo(-86, 0, a + 1) + a));
        }
        if (t < b)
        {
            foo(t + 1, b, a);
        }
        if (foo(-94, -27 + t, a) && (t == 2))
        {
            if (b < 13)
            {
                r = foo(2, b + 1, 0);
            }
            else
            {
                r = 9;
            }
        }
        else
        {
            r = 16;
        }
    }
    else
    {
        if (t < 0)
        {
            if (t < -72)
            {
                r = foo(b, t, LYRICS);
            }
            else
            {
                if (t < -50)
                {
                    if (b == *a)
                    {
                        r = putchar(a[31]);
                    }
                    else
                    {
                        r = foo(-65, b, a + 1);
                    }
                }
                else
                {
                    r = foo((*a == '/') + t, b, a + 1);
                }
            }
        }
        else
        {
            if (0 < t)
            {
                r = foo(2, 2, 0);
            }
            else
            {
                r = ((*a == '/') || (foo(0, foo(-61, *a, CIPHER), a + 1)));
            }
        }
    }
    return r;
}

int main(int argc, char *argv[])
{
    return foo(1, 0, 0);
}

There are plenty of changes that could still be made to improve readability, such as more comments and replacing the recursive algorithm with iterative loops… but I’ve made my promised twelve improvements, leaving you with plenty of opportunity to make some of your own.



[1] At the time of writing this blog, the original code can be found on the International Obfuscated C Code Contest website on the “Winning Entries” web page, listed as “phillipps.c” under the 1988 sub-heading.

[2] Credit goes to an analysis by Mike Markowski and to a similar exercise in reformatting the source code by Michael Nahas.

At this time of year I sometimes think about a now-famous piece of obfuscated C code. This was the winning entry in the 1988 International Obfuscated C Code Contest [see reference 1] and, believe it or not, will print the entire lyrics to “The Twelve Days of Christmas”:

#include <stdio.h>
main(t,_,a)
char *a;
{
return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,
"@n'+,#'/*{}w+/w#cdnr/+,{}r/*de}+,/*{*+,/w{%+,/w#q#n+,/#{l+,/n{n+,/+#n+,/#\
;#q#n+,/+k#;*+,/'r :'d*'3,}{w+K w'K:'+}e#';dq#'l \
q#'+d'K#!/+k#;q#'r}eKK#}w'r}eKK{nl]'/#;#q#n'){)#}w'){){nl]'/+#n';d}rw' i;# \
){nl]!/n{n#'; r{#w'r nc{nl]'/#{l,+'K {rw' iK{;[{nl]'/w#q#n'wk nw' \
iwk{KK{nl]!/w{%'l##w#' i; :{nl]'/*{q#'ld;r'}{nlwb!/*de}'c \
;;{nl'-{}rw]'/+,}##'*}#nc,',#nw]'/+kd'+e}+;#'rdq#w! nr'/ ') }+}{rl#'{n' ')# \
}'+}##(!!/")
:t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
:0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,
"!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:\nuwloca-O;m .vpbks,fxntdCeghiry"),a+1);
}

There are plenty of articles online that explain how this works; just use your favourite search engine to find out more [see reference 2]. Rather than repeat those descriptions, I’ll stick to the festive theme that prompted this blog and illustrate twelve ways to improve the readability of this wonderful algorithm.

1) Extract the Constant Strings

There are two strings lurking in the source code. The longer string contains an encoded version of the song lyrics and the shorter string contains a substitution cipher to decode the lyrics. The source code is immediately much clearer if these strings are declared near the beginning of the file. In the following example, “static” limits the scope of the strings to one source file and “const” declares them as read-only. I like to use uppercase symbols for constants but that’s nothing more than personal choice. Here’s the modified source code:

#include <stdio.h>

static const char *CIPHER =
"!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:\nuwloca-O;m .vpbks,fxntdCeghiry";

static const char *LYRICS =
"@n'+,#'/*{}w+/w#cdnr/+,{}r/*de}+,/*{*+,/w{%+,/w#q#n+,/#{l+,/n{n+,/+#n+,/#\
;#q#n+,/+k#;*+,/'r :'d*'3,}{w+K w'K:'+}e#';dq#'l \
q#'+d'K#!/+k#;q#'r}eKK#}w'r}eKK{nl]'/#;#q#n'){)#}w'){){nl]'/+#n';d}rw' i;# \
){nl]!/n{n#'; r{#w'r nc{nl]'/#{l,+'K {rw' iK{;[{nl]'/w#q#n'wk nw' \
iwk{KK{nl]!/w{%'l##w#' i; :{nl]'/*{q#'ld;r'}{nlwb!/*de}'c \
;;{nl'-{}rw]'/+,}##'*}#nc,',#nw]'/+kd'+e}+;#'rdq#w! nr'/ ') }+}{rl#'{n' ')# \
}'+}##(!!/";

main(t,_,a)
char *a;
{
return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,LYRICS)
:t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
:0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

2) Reformat the Strings

The two strings are recursively indexed to decode and display the lyrics. Reformatting these strings helps to understand the algorithm. In the next step the lyrics and the cipher are declared using C’s alternative string concatenation syntax. The substitution cipher is split after the 31st character, the lyrics are split at each “/” character and the original code’s “\” concatenation in the lyrics string has been eliminated. There’s also some indentation to improve readability:

#include <stdio.h>

static const char *CIPHER =
     "!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:"
    "\nuwloca-O;m .vpbks,fxntdCeghiry";

static const char *LYRICS =
    "@n'+,#'/"
    "*{}w+/"
    "w#cdnr/"
    "+,{}r/"
    "*de}+,/"
    "*{*+,/"
    "w{%+,/"
    "w#q#n+,/"
    "#{l+,/"
    "n{n+,/"
    "+#n+,/"
    "#;#q#n+,/"
    "+k#;*+,/"
    "'r :'d*'3,}{w+K w'K:'+}e#';dq#'l q#'+d'K#!/"
    "+k#;q#'r}eKK#}w'r}eKK{nl]'/"
    "#;#q#n'){)#}w'){){nl]'/"
    "+#n';d}rw' i;# ){nl]!/"
    "n{n#'; r{#w'r nc{nl]'/"
    "#{l,+'K {rw' iK{;[{nl]'/"
    "w#q#n'wk nw' iwk{KK{nl]!/"
    "w{%'l##w#' i; :{nl]'/"
    "*{q#'ld;r'}{nlwb!/"
    "*de}'c ;;{nl'-{}rw]'/"
    "+,}##'*}#nc,',#nw]'/"
    "+kd'+e}+;#'rdq#w! nr'/"
    " ') }+}{rl#'{n' ')# }'+}##(!!/";

main(t,_,a)
char *a;
{
return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,LYRICS)
:t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
:0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

3) Add Some Comments

All good programmers know that comments are useful. Annotating the constant strings immediately helps understand the algorithm even before any changes are made to the function. I prefer block comments for maximum portability but end-of-line comments would work just as well:

static const char *CIPHER =
    /*
    * Substitution cipher:
    * Replace '!' with '\n'
    * Replace 'e' with 'u'
    * ...
    * Replace ':' with 'y'
    */
     "!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:"
    "\nuwloca-O;m .vpbks,fxntdCeghiry";

static const char *LYRICS =
    "@n'+,#'/"  /* On the */
    "*{}w+/"    /* first */
    "w#cdnr/"   /* second */
    "+,{}r/"    /* third */
    "*de}+,/"   /* fourth */
    "*{*+,/"    /* fifth */
    "w{%+,/"    /* sixth */
    "w#q#n+,/"  /* seventh */
    "#{l+,/"    /* eighth */
    "n{n+,/"    /* ninth */
    "+#n+,/"    /* tenth */
    "#;#q#n+,/" /* eleventh */
    "+k#;*+,/"  /* twelfth */

    /* day of Christmas my true love gave to me\n */
    "'r :'d*'3,}{w+K w'K:'+}e#';dq#'l q#'+d'K#!/"

    /* twelve drummers drumming, */
    "+k#;q#'r}eKK#}w'r}eKK{nl]'/"

    /* eleven pipers piping, */
    "#;#q#n'){)#}w'){){nl]'/"

    /* ten lords a-leaping,\n */
    "+#n';d}rw' i;# ){nl]!/"

    /* nine ladies dancing, */
    "n{n#'; r{#w'r nc{nl]'/"

    /* eight maids a-milking, */
    "#{l,+'K {rw' iK{;[{nl]'/"

    /* seven swans a-swimming,\n */
    "w#q#n'wk nw' iwk{KK{nl]!/"

    /* six geese a-laying, */
    "w{%'l##w#' i; :{nl]'/"

    /* five gold rings;\n */
    "*{q#'ld;r'}{nlwb!/"

    /* four calling birds, */
    "*de}'c ;;{nl'-{}rw]'/"

    /* three french hens, */
    "+,}##'*}#nc,',#nw]'/"

    /* two turtle doves\nand */
    "+kd'+e}+;#'rdq#w! nr'/"

    /* a partridge in a pear tree */
    " ') }+}{rl#'{n' ')# }'+}##(!!/";

4) Indent the Function Contents

This step is straight forward… simply indent the contents of the single function:

main(t,_,a)
char *a;
{
    return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
    1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
    main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,LYRICS)
    :t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
    :0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

5) Eliminate Implicit Types

If you’re compiling the examples then you may have noticed that there are plenty of implicit type warnings. The main function uses an old C syntax to declare parameter “a” as pointer-to-char. The remaining parameters are implicit integers and the return type of the main function is also an implicit integer. These are fixed in the next sample while taking the opportunity to include the “const” type qualifier:

int main(int t, int _, const char *a)
{
    return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
    1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
    main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,LYRICS)
    :t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
    :0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

6) Rename the Underscore Parameter

The second parameter of the main function is a single underscore. While this is perfectly legal in C, it’s a lot harder to read than an alphabetic character. The next sample has replaced all instances of the underscore parameter with the single character “b”:

int main(int t, int b, const char *a)
{
    return!0<t?t<3?main(-79,-13,a+main(-87,1-b,main(-86,0,a+1)+a)):
    1,t<b?main(t+1,b,a):3,main(-94,-27+t,a)&&t==2?b<13?
    main(2,b+1,"%s %d %d\n"):9:16:t<0?t<-72?main(b,t,LYRICS)
    :t<-50?b==*a?putchar(31[a]):main(-65,b,a+1):main((*a=='/')+t,b,a+1)
    :0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

7) Reformat the Ternary Conditional Operator

The obfuscated implementation makes extensive use of the ternary conditional operator. The next sample reformats these ternary operators with some additional whitespace:

int main(int t, int b, const char *a)
{
    return
    !0<t
    ?
        t<3
        ?
            main(-79,-13,a+main(-87,1-b,main(-86,0,a+1)+a))
        :
            1, t<b
            ?
                main(t+1,b,a)
            :
                3, main(-94,-27+t,a)&&t==2
                ?
                    b<13
                    ?
                        main(2,b+1,"%s %d %d\n")
                    :
                        9
                :
                    16
    :
        t<0
        ?
            t<-72
            ?
                main(b,t,LYRICS)
            :
                t<-50
                ?
                    b==*a
                    ?
                        putchar(31[a])
                    :
                        main(-65,b,a+1)
                :
                    main((*a=='/')+t,b,a+1)
        :
            0<t
            ?
                main(2,2,"%s")
            :
                *a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

8) Replace the Ternary Conditional Statements with If-Else

A simple replacement of the ternary conditional statements with if-else statements improves the presentation of the algorithm. In the following sample, there is yet more white space and the multiple return points have been rationalised into a single point of return by using a temporary integer variable “r”:

int main(int t, int b, const char *a)
{
    int r;
    if (!0 < t)
    {
        if (t < 3)
        {
            main(-79, -13, a + main(-87, 1 - b, main(-86, 0, a + 1) + a));
        }
        else
        {
            1;
        }
        if (t < b)
        {
            main(t + 1, b, a);
        }
        else
        {
            3;
        }
        if (main(-94, -27 + t, a) && t == 2)
        {
            if (b < 13)
            {
                r = main(2, b + 1, "%s %d %d\n");
            }
            else
            {
                r = 9;
            }
        }
        else
        {
            r = 16;
        }
    }
    else
    {
        if (t < 0)
        {
            if (t < -72)
            {
                r = main(b, t, LYRICS);
            }
            else
            {
                if (t < -50)
                {
                    if (b == *a)
                    {
                        r = putchar(31[a]);
                    }
                    else
                    {
                        r = main(-65, b, a + 1);
                    }
                }
                else
                {
                    r = main((*a == '/') + t, b, a + 1);
                }
            }
        }
        else
        {
            if (0 < t)
            {
                r = main(2, 2, "%s");
            }
            else
            {
                r = *a == '/' || main(0, main(-61, *a, CIPHER), a + 1);
            }
        }
    }
    return r;
}

9) Simplify an Array Access

This change is straight forward. The array access “31[a]” is exactly the same as “a[31]” yet the latter is much easier to understand in the context of the 31 character substitution cipher:

    ...
    r = putchar(a[31]);
    ...

10) Add Brackets to Logical Expressions

Personally, I prefer plenty of brackets in compound logical expressions. The following snips are small improvements:

    ...
    if (main(-94, -27 + t, a) && (t == 2))
    ...

    ...
    r = ((*a == '/') || (main(0, main(-61, *a, CIPHER), a + 1)));
    ...

11) Remove Misdirection

There are several instances where the implementation is deliberately misleading. Given the reformatted source code so far, it’s easy to see that there are two “else” clauses with no side effects (“else {1;}” and “else {3;}”). Less obvious is that the parameters that look like string formatting specifiers passed to the main function are unused in that iteration of the function and can be replaced with zero:

    ...
    if (b < 13)
    {
        r = main(2, b + 1, 0);
    }
    ...

    ...
    r = main(2, 2, 0);
    ...

12) Implement a Stand-Alone Recursive Function

The entire algorithm so far uses the single “main” function in an unusual recursive manner. After the last of the twelve changes, the main function looks like a standard C main function and there’s a separate local-scope (static) recursive function which, in time-honoured programmer’s tradition, is called “foo”. This gives the final complete sample after all twelve changes:

#include <stdio.h>

static const char *CIPHER =
    /*
    * Substitution cipher:
    * Replace '!' with '\n'
    * Replace 'e' with 'u'
    * ...
    * Replace ':' with 'y'
    */
     "!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:"
    "\nuwloca-O;m .vpbks,fxntdCeghiry";

static const char *LYRICS =
    "@n'+,#'/"  /* On the */
    "*{}w+/"    /* first */
    "w#cdnr/"   /* second */
    "+,{}r/"    /* third */
    "*de}+,/"   /* fourth */
    "*{*+,/"    /* fifth */
    "w{%+,/"    /* sixth */
    "w#q#n+,/"  /* seventh */
    "#{l+,/"    /* eighth */
    "n{n+,/"    /* ninth */
    "+#n+,/"    /* tenth */
    "#;#q#n+,/" /* eleventh */
    "+k#;*+,/"  /* twelfth */

    /* day of Christmas my true love gave to me\n */
    "'r :'d*'3,}{w+K w'K:'+}e#';dq#'l q#'+d'K#!/"

    /* twelve drummers drumming, */
    "+k#;q#'r}eKK#}w'r}eKK{nl]'/"

    /* eleven pipers piping, */
    "#;#q#n'){)#}w'){){nl]'/"

    /* ten lords a-leaping,\n */
    "+#n';d}rw' i;# ){nl]!/"

    /* nine ladies dancing, */
    "n{n#'; r{#w'r nc{nl]'/"

    /* eight maids a-milking, */
    "#{l,+'K {rw' iK{;[{nl]'/"

    /* seven swans a-swimming,\n */
    "w#q#n'wk nw' iwk{KK{nl]!/"

    /* six geese a-laying, */
    "w{%'l##w#' i; :{nl]'/"

    /* five gold rings;\n */
    "*{q#'ld;r'}{nlwb!/"

    /* four calling birds, */
    "*de}'c ;;{nl'-{}rw]'/"

    /* three french hens, */
    "+,}##'*}#nc,',#nw]'/"

    /* two turtle doves\nand */
    "+kd'+e}+;#'rdq#w! nr'/"

    /* a partridge in a pear tree */
    " ') }+}{rl#'{n' ')# }'+}##(!!/";

static int foo(int t, int b, const char *a)
{
    int r;
    if (!0 < t)
    {
        if (t < 3)
        {
            foo(-79, -13, a + foo(-87, 1 - b, foo(-86, 0, a + 1) + a));
        }
        if (t < b)
        {
            foo(t + 1, b, a);
        }
        if (foo(-94, -27 + t, a) && (t == 2))
        {
            if (b < 13)
            {
                r = foo(2, b + 1, 0);
            }
            else
            {
                r = 9;
            }
        }
        else
        {
            r = 16;
        }
    }
    else
    {
        if (t < 0)
        {
            if (t < -72)
            {
                r = foo(b, t, LYRICS);
            }
            else
            {
                if (t < -50)
                {
                    if (b == *a)
                    {
                        r = putchar(a[31]);
                    }
                    else
                    {
                        r = foo(-65, b, a + 1);
                    }
                }
                else
                {
                    r = foo((*a == '/') + t, b, a + 1);
                }
            }
        }
        else
        {
            if (0 < t)
            {
                r = foo(2, 2, 0);
            }
            else
            {
                r = ((*a == '/') || (foo(0, foo(-61, *a, CIPHER), a + 1)));
            }
        }
    }
    return r;
}

int main(int argc, char *argv[])
{
    return foo(1, 0, 0);
}

There are plenty of changes that could still be made to improve readability, such as more comments and replacing the recursive algorithm with iterative loops… but I’ve made my promised twelve improvements, leaving you with plenty of opportunity to make some of your own.

[1] At the time of writing this blog, the original code can be found on the International Obfuscated C Code Contest website on the “Winning Entries” web page, listed as “phillipps.c” under the 1988 sub-heading.

[2] Credit goes to an analysis by Mike Markowski and to a similar exercise in reformatting the source code by Michael Nahas.

Twelve Ways to Style The Twelve Days

Twelve Ways to Style The Twelve Days

Kevin Jones - Senior Consultant, Embedded Systems

By: Kevin Jones
Senior Consultant, Embedded Systems

10th January 2018

Home » c code

During the Christmas period, I was reminded of a now-famous piece of obfuscated C code. This was the winning entry in the 1988 International Obfuscated C Code Contest [see reference 1] and, believe it or not, will print the entire lyrics to “The Twelve Days of Christmas”:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save


#include <stdio.h>
main(t,_,a)
char *a;
{
return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,
"@n'+,#'/*{}w+/w#cdnr/+,{}r/*de}+,/*{*+,/w{%+,/w#q#n+,/#{l+,/n{n+,/+#n+,/#\
;#q#n+,/+k#;*+,/'r :'d*'3,}{w+K w'K:'+}e#';dq#'l \
q#'+d'K#!/+k#;q#'r}eKK#}w'r}eKK{nl]'/#;#q#n'){)#}w'){){nl]'/+#n';d}rw' i;# \
){nl]!/n{n#'; r{#w'r nc{nl]'/#{l,+'K {rw' iK{;[{nl]'/w#q#n'wk nw' \
iwk{KK{nl]!/w{%'l##w#' i; :{nl]'/*{q#'ld;r'}{nlwb!/*de}'c \
;;{nl'-{}rw]'/+,}##'*}#nc,',#nw]'/+kd'+e}+;#'rdq#w! nr'/ ') }+}{rl#'{n' ')# \
}'+}##(!!/")
:t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
:0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,
"!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:\nuwloca-O;m .vpbks,fxntdCeghiry"),a+1);
}

There are plenty of articles online that explain how this works; just use your favourite search engine to find out more [see reference 2]. Rather than repeat those descriptions, I’ll stick to the festive theme that prompted this blog and illustrate twelve ways to improve the readability of this wonderful algorithm.

1) Extract the Constant Strings

There are two strings lurking in the source code. The longer string contains an encoded version of the song lyrics and the shorter string contains a substitution cipher to decode the lyrics. The source code is immediately much clearer if these strings are declared near the beginning of the file. In the following example, “static” limits the scope of the strings to one source file and “const” declares them as read-only. I like to use uppercase symbols for constants but that’s nothing more than personal choice. Here’s the modified source code:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

#include 

static const char *CIPHER =
"!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:\nuwloca-O;m .vpbks,fxntdCeghiry";

static const char *LYRICS =
"@n'+,#'/*{}w+/w#cdnr/+,{}r/*de}+,/*{*+,/w{%+,/w#q#n+,/#{l+,/n{n+,/+#n+,/#\
;#q#n+,/+k#;*+,/'r :'d*'3,}{w+K w'K:'+}e#';dq#'l \
q#'+d'K#!/+k#;q#'r}eKK#}w'r}eKK{nl]'/#;#q#n'){)#}w'){){nl]'/+#n';d}rw' i;# \
){nl]!/n{n#'; r{#w'r nc{nl]'/#{l,+'K {rw' iK{;[{nl]'/w#q#n'wk nw' \
iwk{KK{nl]!/w{%'l##w#' i; :{nl]'/*{q#'ld;r'}{nlwb!/*de}'c \
;;{nl'-{}rw]'/+,}##'*}#nc,',#nw]'/+kd'+e}+;#'rdq#w! nr'/ ') }+}{rl#'{n' ')# \
}'+}##(!!/";

main(t,_,a)
char *a;
{
return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,LYRICS)
:t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
:0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

2) Reformat the Strings

The two strings are recursively indexed to decode and display the lyrics. Reformatting these strings helps to understand the algorithm. In the next step the lyrics and the cipher are declared using C’s alternative string concatenation syntax. The substitution cipher is split after the 31st character, the lyrics are split at each “/” character and the original code’s “\” concatenation in the lyrics string has been eliminated. There’s also some indentation to improve readability:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

#include 

static const char *CIPHER =
     &quot;!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:&quot;
    &quot;\nuwloca-O;m .vpbks,fxntdCeghiry&quot;;

static const char *LYRICS =
    &quot;@n'+,#'/&quot;
    &quot;*{}w+/&quot;
    &quot;w#cdnr/&quot;
    &quot;+,{}r/&quot;
    &quot;*de}+,/&quot;
    &quot;*{*+,/&quot;
    &quot;w{%+,/&quot;
    &quot;w#q#n+,/&quot;
    &quot;#{l+,/&quot;
    &quot;n{n+,/&quot;
    &quot;+#n+,/&quot;
    &quot;#;#q#n+,/&quot;
    &quot;+k#;*+,/&quot;
    &quot;'r :'d*'3,}{w+K w'K:'+}e#';dq#'l q#'+d'K#!/&quot;
    &quot;+k#;q#'r}eKK#}w'r}eKK{nl]'/&quot;
    &quot;#;#q#n'){)#}w'){){nl]'/&quot;
    &quot;+#n';d}rw' i;# ){nl]!/&quot;
    &quot;n{n#'; r{#w'r nc{nl]'/&quot;
    &quot;#{l,+'K {rw' iK{;[{nl]'/&quot;
    &quot;w#q#n'wk nw' iwk{KK{nl]!/&quot;
    &quot;w{%'l##w#' i; :{nl]'/&quot;
    &quot;*{q#'ld;r'}{nlwb!/&quot;
    &quot;*de}'c ;;{nl'-{}rw]'/&quot;
    &quot;+,}##'*}#nc,',#nw]'/&quot;
    &quot;+kd'+e}+;#'rdq#w! nr'/&quot;
    &quot; ') }+}{rl#'{n' ')# }'+}##(!!/&quot;;

main(t,_,a)
char *a;
{
return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
main(2,_+1,&quot;%s %d %d\n&quot;):9:16:t<0?t<-72?main(_,t,LYRICS)
:t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
:0<t?main(2,2,&quot;%s&quot;):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

3) Add Some Comments

All good programmers know that comments are useful. Annotating the constant strings immediately helps understand the algorithm even before any changes are made to the function. I prefer block comments for maximum portability but end-of-line comments would work just as well:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

static const char *CIPHER =
    /*
    * Substitution cipher:
    * Replace '!' with '\n'
    * Replace 'e' with 'u'
    * ...
    * Replace ':' with 'y'
    */
     &quot;!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:&quot;
    &quot;\nuwloca-O;m .vpbks,fxntdCeghiry&quot;;

static const char *LYRICS =
    &quot;@n'+,#'/&quot;  /* On the */
    &quot;*{}w+/&quot;    /* first */
    &quot;w#cdnr/&quot;   /* second */
    &quot;+,{}r/&quot;    /* third */
    &quot;*de}+,/&quot;   /* fourth */
    &quot;*{*+,/&quot;    /* fifth */
    &quot;w{%+,/&quot;    /* sixth */
    &quot;w#q#n+,/&quot;  /* seventh */
    &quot;#{l+,/&quot;    /* eighth */
    &quot;n{n+,/&quot;    /* ninth */
    &quot;+#n+,/&quot;    /* tenth */
    &quot;#;#q#n+,/&quot; /* eleventh */
    &quot;+k#;*+,/&quot;  /* twelfth */

    /* day of Christmas my true love gave to me\n */
    &quot;'r :'d*'3,}{w+K w'K:'+}e#';dq#'l q#'+d'K#!/&quot;

    /* twelve drummers drumming, */
    &quot;+k#;q#'r}eKK#}w'r}eKK{nl]'/&quot;

    /* eleven pipers piping, */
    &quot;#;#q#n'){)#}w'){){nl]'/&quot;

    /* ten lords a-leaping,\n */
    &quot;+#n';d}rw' i;# ){nl]!/&quot;

    /* nine ladies dancing, */
    &quot;n{n#'; r{#w'r nc{nl]'/&quot;

    /* eight maids a-milking, */
    &quot;#{l,+'K {rw' iK{;[{nl]'/&quot;

    /* seven swans a-swimming,\n */
    &quot;w#q#n'wk nw' iwk{KK{nl]!/&quot;

    /* six geese a-laying, */
    &quot;w{%'l##w#' i; :{nl]'/&quot;

    /* five gold rings;\n */
    &quot;*{q#'ld;r'}{nlwb!/&quot;

    /* four calling birds, */
    &quot;*de}'c ;;{nl'-{}rw]'/&quot;

    /* three french hens, */
    &quot;+,}##'*}#nc,',#nw]'/&quot;

    /* two turtle doves\nand */
    &quot;+kd'+e}+;#'rdq#w! nr'/&quot;

    /* a partridge in a pear tree */
    &quot; ') }+}{rl#'{n' ')# }'+}##(!!/&quot;;

4) Indent the Function Contents

This step is straight forward… simply indent the contents of the single function:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

main(t,_,a)
char *a;
{
    return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
    1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
    main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,LYRICS)
    :t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
    :0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

5) Eliminate Implicit Types

If you’re compiling the examples then you may have noticed that there are plenty of implicit type warnings. The main function uses an old C syntax to declare parameter “a” as pointer-to-char. The remaining parameters are implicit integers and the return type of the main function is also an implicit integer. These are fixed in the next sample while taking the opportunity to include the “const” type qualifier:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

int main(int t, int _, const char *a)
{
    return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
    1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
    main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,LYRICS)
    :t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
    :0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

6) Rename the Underscore Parameter

The second parameter of the main function is a single underscore. While this is perfectly legal in C, it’s a lot harder to read than an alphabetic character. The next sample has replaced all instances of the underscore parameter with the single character “b”:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

int main(int t, int b, const char *a)
{
    return!0<t?t<3?main(-79,-13,a+main(-87,1-b,main(-86,0,a+1)+a)):
    1,t<b?main(t+1,b,a):3,main(-94,-27+t,a)&&t==2?b<13?
    main(2,b+1,"%s %d %d\n"):9:16:t<0?t<-72?main(b,t,LYRICS)
    :t<-50?b==*a?putchar(31[a]):main(-65,b,a+1):main((*a=='/')+t,b,a+1)
    :0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

7) Reformat the Ternary Conditional Operator

The obfuscated implementation makes extensive use of the ternary conditional operator. The next sample reformats these ternary operators with some additional whitespace:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

int main(int t, int b, const char *a)
{
return
!0

<pre> 

8) Replace the Ternary Conditional Statements with If-Else

A simple replacement of the ternary conditional statements with if-else statements improves the presentation of the algorithm. In the following sample, there is yet more white space and the multiple return points have been rationalised into a single point of return by using a temporary integer variable “r”:

int main(int t, int b, const char *a)
{
    int r;
    if (!0 < t)
    {
        if (t < 3)
        {
            main(-79, -13, a + main(-87, 1 - b, main(-86, 0, a + 1) + a));
        }
        else
        {
            1;
        }
        if (t < b)
        {
            main(t + 1, b, a);
        }
        else
        {
            3;
        }
        if (main(-94, -27 + t, a) && t == 2)
        {
            if (b < 13)
            {
                r = main(2, b + 1, "%s %d %d\n");
            }
            else
            {
                r = 9;
            }
        }
        else
        {
            r = 16;
        }
    }
    else
    {
        if (t < 0)
        {
            if (t < -72)
            {
                r = main(b, t, LYRICS);
            }
            else
            {
                if (t < -50)
                {
                    if (b == *a)
                    {
                        r = putchar(31[a]);
                    }
                    else
                    {
                        r = main(-65, b, a + 1);
                    }
                }
                else
                {
                    r = main((*a == '/') + t, b, a + 1);
                }
            }
        }
        else
        {
            if (0 < t)
            {
                r = main(2, 2, "%s");
            }
            else
            {
                r = *a == '/' || main(0, main(-61, *a, CIPHER), a + 1);
            }
        }
    }
    return r;
}

9) Simplify an Array Access

This change is straight forward. The array access “31[a]” is exactly the same as “a[31]” yet the latter is much easier to understand in the context of the 31 character substitution cipher:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

    ...
    r = putchar(a[31]);
    ...

10) Add Brackets to Logical Expressions

Personally, I prefer plenty of brackets in compound logical expressions. The following snips are small improvements:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

    ...
    if (main(-94, -27 + t, a) && (t == 2))
    ...

    ...
    r = ((*a == '/') || (main(0, main(-61, *a, CIPHER), a + 1)));
    ...

11) Remove Misdirection

There are several instances where the implementation is deliberately misleading. Given the reformatted source code so far, it’s easy to see that there are two “else” clauses with no side effects (“else {1;}” and “else {3;}”). Less obvious is that the parameters that look like string formatting specifiers passed to the main function are unused in that iteration of the function and can be replaced with zero:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

    ...
    if (b < 13)
    {
        r = main(2, b + 1, 0);
    }
    ...

    ...
    r = main(2, 2, 0);
    ...

12) Implement a Stand-Alone Recursive Function

The entire algorithm so far uses the single “main” function in an unusual recursive manner. After the last of the twelve changes, the main function looks like a standard C main function and there’s a separate local-scope (static) recursive function which, in time-honoured programmer’s tradition, is called “foo”. This gives the final complete sample after all twelve changes:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

#include 

static const char *CIPHER =
    /*
    * Substitution cipher:
    * Replace '!' with '\n'
    * Replace 'e' with 'u'
    * ...
    * Replace ':' with 'y'
    */
     "!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:"
    "\nuwloca-O;m .vpbks,fxntdCeghiry";

static const char *LYRICS =
    "@n'+,#'/"  /* On the */
    "*{}w+/"    /* first */
    "w#cdnr/"   /* second */
    "+,{}r/"    /* third */
    "*de}+,/"   /* fourth */
    "*{*+,/"    /* fifth */
    "w{%+,/"    /* sixth */
    "w#q#n+,/"  /* seventh */
    "#{l+,/"    /* eighth */
    "n{n+,/"    /* ninth */
    "+#n+,/"    /* tenth */
    "#;#q#n+,/" /* eleventh */
    "+k#;*+,/"  /* twelfth */

    /* day of Christmas my true love gave to me\n */
    "'r :'d*'3,}{w+K w'K:'+}e#';dq#'l q#'+d'K#!/"

    /* twelve drummers drumming, */
    "+k#;q#'r}eKK#}w'r}eKK{nl]'/"

    /* eleven pipers piping, */
    "#;#q#n'){)#}w'){){nl]'/"

    /* ten lords a-leaping,\n */
    "+#n';d}rw' i;# ){nl]!/"

    /* nine ladies dancing, */
    "n{n#'; r{#w'r nc{nl]'/"

    /* eight maids a-milking, */
    "#{l,+'K {rw' iK{;[{nl]'/"

    /* seven swans a-swimming,\n */
    "w#q#n'wk nw' iwk{KK{nl]!/"

    /* six geese a-laying, */
    "w{%'l##w#' i; :{nl]'/"

    /* five gold rings;\n */
    "*{q#'ld;r'}{nlwb!/"

    /* four calling birds, */
    "*de}'c ;;{nl'-{}rw]'/"

    /* three french hens, */
    "+,}##'*}#nc,',#nw]'/"

    /* two turtle doves\nand */
    "+kd'+e}+;#'rdq#w! nr'/"

    /* a partridge in a pear tree */
    " ') }+}{rl#'{n' ')# }'+}##(!!/";

static int foo(int t, int b, const char *a)
{
    int r;
    if (!0 < t)
    {
        if (t < 3)
        {
            foo(-79, -13, a + foo(-87, 1 - b, foo(-86, 0, a + 1) + a));
        }
        if (t < b)
        {
            foo(t + 1, b, a);
        }
        if (foo(-94, -27 + t, a) && (t == 2))
        {
            if (b < 13)
            {
                r = foo(2, b + 1, 0);
            }
            else
            {
                r = 9;
            }
        }
        else
        {
            r = 16;
        }
    }
    else
    {
        if (t < 0)
        {
            if (t < -72)
            {
                r = foo(b, t, LYRICS);
            }
            else
            {
                if (t < -50)
                {
                    if (b == *a)
                    {
                        r = putchar(a[31]);
                    }
                    else
                    {
                        r = foo(-65, b, a + 1);
                    }
                }
                else
                {
                    r = foo((*a == '/') + t, b, a + 1);
                }
            }
        }
        else
        {
            if (0 < t)
            {
                r = foo(2, 2, 0);
            }
            else
            {
                r = ((*a == '/') || (foo(0, foo(-61, *a, CIPHER), a + 1)));
            }
        }
    }
    return r;
}

int main(int argc, char *argv[])
{
    return foo(1, 0, 0);
}

There are plenty of changes that could still be made to improve readability, such as more comments and replacing the recursive algorithm with iterative loops… but I’ve made my promised twelve improvements, leaving you with plenty of opportunity to make some of your own.





[1] At the time of writing this blog, the original code can be found on the International Obfuscated C Code Contest website on the “Winning Entries” web page, listed as “phillipps.c” under the 1988 sub-heading.

[2] Credit goes to an analysis by Mike Markowski and to a similar exercise in reformatting the source code by Michael Nahas.

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

During the Christmas period, I was reminded of a now-famous piece of obfuscated C code. This was the winning entry in the 1988 International Obfuscated C Code Contest [see reference 1] and, believe it or not, will print the entire lyrics to “The Twelve Days of Christmas”:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save


#include <stdio.h>
main(t,_,a)
char *a;
{
return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
main(2,_+1,"%s %d %d\n"):9:16:t<0?t<-72?main(_,t,
"@n'+,#'/*{}w+/w#cdnr/+,{}r/*de}+,/*{*+,/w{%+,/w#q#n+,/#{l+,/n{n+,/+#n+,/#\
;#q#n+,/+k#;*+,/'r :'d*'3,}{w+K w'K:'+}e#';dq#'l \
q#'+d'K#!/+k#;q#'r}eKK#}w'r}eKK{nl]'/#;#q#n'){)#}w'){){nl]'/+#n';d}rw' i;# \
){nl]!/n{n#'; r{#w'r nc{nl]'/#{l,+'K {rw' iK{;[{nl]'/w#q#n'wk nw' \
iwk{KK{nl]!/w{%'l##w#' i; :{nl]'/*{q#'ld;r'}{nlwb!/*de}'c \
;;{nl'-{}rw]'/+,}##'*}#nc,',#nw]'/+kd'+e}+;#'rdq#w! nr'/ ') }+}{rl#'{n' ')# \
}'+}##(!!/")
:t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
:0<t?main(2,2,"%s"):*a=='/'||main(0,main(-61,*a,
"!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:\nuwloca-O;m .vpbks,fxntdCeghiry"),a+1);
}

There are plenty of articles online that explain how this works; just use your favourite search engine to find out more [see reference 2]. Rather than repeat those descriptions, I’ll stick to the festive theme that prompted this blog and illustrate twelve ways to improve the readability of this wonderful algorithm.

1) Extract the Constant Strings

There are two strings lurking in the source code. The longer string contains an encoded version of the song lyrics and the shorter string contains a substitution cipher to decode the lyrics. The source code is immediately much clearer if these strings are declared near the beginning of the file. In the following example, “static” limits the scope of the strings to one source file and “const” declares them as read-only. I like to use uppercase symbols for constants but that’s nothing more than personal choice. Here’s the modified source code:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

#include <stdio.h>

static const char *CIPHER =
&quot;!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:\nuwloca-O;m .vpbks,fxntdCeghiry&quot;;

static const char *LYRICS =
&quot;@n'+,#'/*{}w+/w#cdnr/+,{}r/*de}+,/*{*+,/w{%+,/w#q#n+,/#{l+,/n{n+,/+#n+,/#\
;#q#n+,/+k#;*+,/'r :'d*'3,}{w+K w'K:'+}e#';dq#'l \
q#'+d'K#!/+k#;q#'r}eKK#}w'r}eKK{nl]'/#;#q#n'){)#}w'){){nl]'/+#n';d}rw' i;# \
){nl]!/n{n#'; r{#w'r nc{nl]'/#{l,+'K {rw' iK{;[{nl]'/w#q#n'wk nw' \
iwk{KK{nl]!/w{%'l##w#' i; :{nl]'/*{q#'ld;r'}{nlwb!/*de}'c \
;;{nl'-{}rw]'/+,}##'*}#nc,',#nw]'/+kd'+e}+;#'rdq#w! nr'/ ') }+}{rl#'{n' ')# \
}'+}##(!!/&quot;;

main(t,_,a)
char *a;
{
return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
main(2,_+1,&quot;%s %d %d\n&quot;):9:16:t<0?t<-72?main(_,t,LYRICS)
:t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
:0<t?main(2,2,&quot;%s&quot;):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

2) Reformat the Strings

The two strings are recursively indexed to decode and display the lyrics. Reformatting these strings helps to understand the algorithm. In the next step the lyrics and the cipher are declared using C’s alternative string concatenation syntax. The substitution cipher is split after the 31st character, the lyrics are split at each “/” character and the original code’s “\” concatenation in the lyrics string has been eliminated. There’s also some indentation to improve readability:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

#include 

static const char *CIPHER =
     &quot;!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:&quot;
    &quot;\nuwloca-O;m .vpbks,fxntdCeghiry&quot;;

static const char *LYRICS =
    &quot;@n'+,#'/&quot;
    &quot;*{}w+/&quot;
    &quot;w#cdnr/&quot;
    &quot;+,{}r/&quot;
    &quot;*de}+,/&quot;
    &quot;*{*+,/&quot;
    &quot;w{%+,/&quot;
    &quot;w#q#n+,/&quot;
    &quot;#{l+,/&quot;
    &quot;n{n+,/&quot;
    &quot;+#n+,/&quot;
    &quot;#;#q#n+,/&quot;
    &quot;+k#;*+,/&quot;
    &quot;'r :'d*'3,}{w+K w'K:'+}e#';dq#'l q#'+d'K#!/&quot;
    &quot;+k#;q#'r}eKK#}w'r}eKK{nl]'/&quot;
    &quot;#;#q#n'){)#}w'){){nl]'/&quot;
    &quot;+#n';d}rw' i;# ){nl]!/&quot;
    &quot;n{n#'; r{#w'r nc{nl]'/&quot;
    &quot;#{l,+'K {rw' iK{;[{nl]'/&quot;
    &quot;w#q#n'wk nw' iwk{KK{nl]!/&quot;
    &quot;w{%'l##w#' i; :{nl]'/&quot;
    &quot;*{q#'ld;r'}{nlwb!/&quot;
    &quot;*de}'c ;;{nl'-{}rw]'/&quot;
    &quot;+,}##'*}#nc,',#nw]'/&quot;
    &quot;+kd'+e}+;#'rdq#w! nr'/&quot;
    &quot; ') }+}{rl#'{n' ')# }'+}##(!!/&quot;;

main(t,_,a)
char *a;
{
return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
main(2,_+1,&quot;%s %d %d\n&quot;):9:16:t<0?t<-72?main(_,t,LYRICS)
:t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
:0<t?main(2,2,&quot;%s&quot;):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

3) Add Some Comments

All good programmers know that comments are useful. Annotating the constant strings immediately helps understand the algorithm even before any changes are made to the function. I prefer block comments for maximum portability but end-of-line comments would work just as well:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

static const char *CIPHER =
    /*
    * Substitution cipher:
    * Replace '!' with '\n'
    * Replace 'e' with 'u'
    * ...
    * Replace ':' with 'y'
    */
     &quot;!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:&quot;
    &quot;\nuwloca-O;m .vpbks,fxntdCeghiry&quot;;

static const char *LYRICS =
    &quot;@n'+,#'/&quot;  /* On the */
    &quot;*{}w+/&quot;    /* first */
    &quot;w#cdnr/&quot;   /* second */
    &quot;+,{}r/&quot;    /* third */
    &quot;*de}+,/&quot;   /* fourth */
    &quot;*{*+,/&quot;    /* fifth */
    &quot;w{%+,/&quot;    /* sixth */
    &quot;w#q#n+,/&quot;  /* seventh */
    &quot;#{l+,/&quot;    /* eighth */
    &quot;n{n+,/&quot;    /* ninth */
    &quot;+#n+,/&quot;    /* tenth */
    &quot;#;#q#n+,/&quot; /* eleventh */
    &quot;+k#;*+,/&quot;  /* twelfth */

    /* day of Christmas my true love gave to me\n */
    &quot;'r :'d*'3,}{w+K w'K:'+}e#';dq#'l q#'+d'K#!/&quot;

    /* twelve drummers drumming, */
    &quot;+k#;q#'r}eKK#}w'r}eKK{nl]'/&quot;

    /* eleven pipers piping, */
    &quot;#;#q#n'){)#}w'){){nl]'/&quot;

    /* ten lords a-leaping,\n */
    &quot;+#n';d}rw' i;# ){nl]!/&quot;

    /* nine ladies dancing, */
    &quot;n{n#'; r{#w'r nc{nl]'/&quot;

    /* eight maids a-milking, */
    &quot;#{l,+'K {rw' iK{;[{nl]'/&quot;

    /* seven swans a-swimming,\n */
    &quot;w#q#n'wk nw' iwk{KK{nl]!/&quot;

    /* six geese a-laying, */
    &quot;w{%'l##w#' i; :{nl]'/&quot;

    /* five gold rings;\n */
    &quot;*{q#'ld;r'}{nlwb!/&quot;

    /* four calling birds, */
    &quot;*de}'c ;;{nl'-{}rw]'/&quot;

    /* three french hens, */
    &quot;+,}##'*}#nc,',#nw]'/&quot;

    /* two turtle doves\nand */
    &quot;+kd'+e}+;#'rdq#w! nr'/&quot;

    /* a partridge in a pear tree */
    &quot; ') }+}{rl#'{n' ')# }'+}##(!!/&quot;;

4) Indent the Function Contents

This step is straight forward… simply indent the contents of the single function:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

main(t,_,a)
char *a;
{
    return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
    1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
    main(2,_+1,&quot;%s %d %d\n&quot;):9:16:t<0?t<-72?main(_,t,LYRICS)
    :t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
    :0<t?main(2,2,&quot;%s&quot;):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

5) Eliminate Implicit Types

If you’re compiling the examples then you may have noticed that there are plenty of implicit type warnings. The main function uses an old C syntax to declare parameter “a” as pointer-to-char. The remaining parameters are implicit integers and the return type of the main function is also an implicit integer. These are fixed in the next sample while taking the opportunity to include the “const” type qualifier:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

int main(int t, int _, const char *a)
{
    return!0<t?t<3?main(-79,-13,a+main(-87,1-_,main(-86,0,a+1)+a)):
    1,t<_?main(t+1,_,a):3,main(-94,-27+t,a)&&t==2?_<13?
    main(2,_+1,&quot;%s %d %d\n&quot;):9:16:t<0?t<-72?main(_,t,LYRICS)
    :t<-50?_==*a?putchar(31[a]):main(-65,_,a+1):main((*a=='/')+t,_,a+1)
    :0<t?main(2,2,&quot;%s&quot;):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

6) Rename the Underscore Parameter

The second parameter of the main function is a single underscore. While this is perfectly legal in C, it’s a lot harder to read than an alphabetic character. The next sample has replaced all instances of the underscore parameter with the single character “b”:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

int main(int t, int b, const char *a)
{
    return!0<t?t<3?main(-79,-13,a+main(-87,1-b,main(-86,0,a+1)+a)):
    1,t<b?main(t+1,b,a):3,main(-94,-27+t,a)&&t==2?b<13?
    main(2,b+1,&quot;%s %d %d\n&quot;):9:16:t<0?t<-72?main(b,t,LYRICS)
    :t<-50?b==*a?putchar(31[a]):main(-65,b,a+1):main((*a=='/')+t,b,a+1)
    :0<t?main(2,2,&quot;%s&quot;):*a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

7) Reformat the Ternary Conditional Operator

The obfuscated implementation makes extensive use of the ternary conditional operator. The next sample reformats these ternary operators with some additional whitespace:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

int main(int t, int b, const char *a)
{
    return
    !0<t
    ?
        t<3
        ?
            main(-79,-13,a+main(-87,1-b,main(-86,0,a+1)+a))
        :
            1, t<b
            ?
                main(t+1,b,a)
            :
                3, main(-94,-27+t,a)&&t==2
                ?
                    b<13
                    ?
                        main(2,b+1,&quot;%s %d %d\n&quot;)
                    :
                        9
                :
                    16
    :
        t<0
        ?
            t<-72
            ?
                main(b,t,LYRICS)
            :
                t<-50
                ?
                    b==*a
                    ?
                        putchar(31[a])
                    :
                        main(-65,b,a+1)
                :
                    main((*a=='/')+t,b,a+1)
        :
            0<t
            ?
                main(2,2,&quot;%s&quot;)
            :
                *a=='/'||main(0,main(-61,*a,CIPHER),a+1);
}

8) Replace the Ternary Conditional Statements with If-Else

A simple replacement of the ternary conditional statements with if-else statements improves the presentation of the algorithm. In the following sample, there is yet more white space and the multiple return points have been rationalised into a single point of return by using a temporary integer variable “r”:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

int main(int t, int b, const char *a)
{
    int r;
    if (!0 < t)
    {
        if (t < 3)
        {
            main(-79, -13, a + main(-87, 1 - b, main(-86, 0, a + 1) + a));
        }
        else
        {
            1;
        }
        if (t < b)
        {
            main(t + 1, b, a);
        }
        else
        {
            3;
        }
        if (main(-94, -27 + t, a) && t == 2)
        {
            if (b < 13)
            {
                r = main(2, b + 1, &quot;%s %d %d\n&quot;);
            }
            else
            {
                r = 9;
            }
        }
        else
        {
            r = 16;
        }
    }
    else
    {
        if (t < 0)
        {
            if (t < -72)
            {
                r = main(b, t, LYRICS);
            }
            else
            {
                if (t < -50)
                {
                    if (b == *a)
                    {
                        r = putchar(31[a]);
                    }
                    else
                    {
                        r = main(-65, b, a + 1);
                    }
                }
                else
                {
                    r = main((*a == '/') + t, b, a + 1);
                }
            }
        }
        else
        {
            if (0 < t)
            {
                r = main(2, 2, &quot;%s&quot;);
            }
            else
            {
                r = *a == '/' || main(0, main(-61, *a, CIPHER), a + 1);
            }
        }
    }
    return r;
}

9) Simplify an Array Access

This change is straight forward. The array access “31[a]” is exactly the same as “a[31]” yet the latter is much easier to understand in the context of the 31 character substitution cipher:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

    ...
    r = putchar(a[31]);
    ...

10) Add Brackets to Logical Expressions

Personally, I prefer plenty of brackets in compound logical expressions. The following snips are small improvements:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

    ...
    if (main(-94, -27 + t, a) && (t == 2))
    ...

    ...
    r = ((*a == '/') || (main(0, main(-61, *a, CIPHER), a + 1)));
    ...

11) Remove Misdirection

There are several instances where the implementation is deliberately misleading. Given the reformatted source code so far, it’s easy to see that there are two “else” clauses with no side effects (“else {1;}” and “else {3;}”). Less obvious is that the parameters that look like string formatting specifiers passed to the main function are unused in that iteration of the function and can be replaced with zero:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

    ...
    if (b < 13)
    {
        r = main(2, b + 1, 0);
    }
    ...

    ...
    r = main(2, 2, 0);
    ...

12) Implement a Stand-Alone Recursive Function

The entire algorithm so far uses the single “main” function in an unusual recursive manner. After the last of the twelve changes, the main function looks like a standard C main function and there’s a separate local-scope (static) recursive function which, in time-honoured programmer’s tradition, is called “foo”. This gives the final complete sample after all twelve changes:

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

#include 

static const char *CIPHER =
    /*
    * Substitution cipher:
    * Replace '!' with '\n'
    * Replace 'e' with 'u'
    * ...
    * Replace ':' with 'y'
    */
     &quot;!ek;dc i@bK'(q)-[w]*%n+r3#l,{}:&quot;
    &quot;\nuwloca-O;m .vpbks,fxntdCeghiry&quot;;

static const char *LYRICS =
    &quot;@n'+,#'/&quot;  /* On the */
    &quot;*{}w+/&quot;    /* first */
    &quot;w#cdnr/&quot;   /* second */
    &quot;+,{}r/&quot;    /* third */
    &quot;*de}+,/&quot;   /* fourth */
    &quot;*{*+,/&quot;    /* fifth */
    &quot;w{%+,/&quot;    /* sixth */
    &quot;w#q#n+,/&quot;  /* seventh */
    &quot;#{l+,/&quot;    /* eighth */
    &quot;n{n+,/&quot;    /* ninth */
    &quot;+#n+,/&quot;    /* tenth */
    &quot;#;#q#n+,/&quot; /* eleventh */
    &quot;+k#;*+,/&quot;  /* twelfth */

    /* day of Christmas my true love gave to me\n */
    &quot;'r :'d*'3,}{w+K w'K:'+}e#';dq#'l q#'+d'K#!/&quot;

    /* twelve drummers drumming, */
    &quot;+k#;q#'r}eKK#}w'r}eKK{nl]'/&quot;

    /* eleven pipers piping, */
    &quot;#;#q#n'){)#}w'){){nl]'/&quot;

    /* ten lords a-leaping,\n */
    &quot;+#n';d}rw' i;# ){nl]!/&quot;

    /* nine ladies dancing, */
    &quot;n{n#'; r{#w'r nc{nl]'/&quot;

    /* eight maids a-milking, */
    &quot;#{l,+'K {rw' iK{;[{nl]'/&quot;

    /* seven swans a-swimming,\n */
    &quot;w#q#n'wk nw' iwk{KK{nl]!/&quot;

    /* six geese a-laying, */
    &quot;w{%'l##w#' i; :{nl]'/&quot;

    /* five gold rings;\n */
    &quot;*{q#'ld;r'}{nlwb!/&quot;

    /* four calling birds, */
    &quot;*de}'c ;;{nl'-{}rw]'/&quot;

    /* three french hens, */
    &quot;+,}##'*}#nc,',#nw]'/&quot;

    /* two turtle doves\nand */
    &quot;+kd'+e}+;#'rdq#w! nr'/&quot;

    /* a partridge in a pear tree */
    &quot; ') }+}{rl#'{n' ')# }'+}##(!!/&quot;;

static int foo(int t, int b, const char *a)
{
    int r;
    if (!0 < t)
    {
        if (t < 3)
        {
            foo(-79, -13, a + foo(-87, 1 - b, foo(-86, 0, a + 1) + a));
        }
        if (t < b)
        {
            foo(t + 1, b, a);
        }
        if (foo(-94, -27 + t, a) && (t == 2))
        {
            if (b < 13)
            {
                r = foo(2, b + 1, 0);
            }
            else
            {
                r = 9;
            }
        }
        else
        {
            r = 16;
        }
    }
    else
    {
        if (t < 0)
        {
            if (t < -72)
            {
                r = foo(b, t, LYRICS);
            }
            else
            {
                if (t < -50)
                {
                    if (b == *a)
                    {
                        r = putchar(a[31]);
                    }
                    else
                    {
                        r = foo(-65, b, a + 1);
                    }
                }
                else
                {
                    r = foo((*a == '/') + t, b, a + 1);
                }
            }
        }
        else
        {
            if (0 < t)
            {
                r = foo(2, 2, 0);
            }
            else
            {
                r = ((*a == '/') || (foo(0, foo(-61, *a, CIPHER), a + 1)));
            }
        }
    }
    return r;
}

int main(int argc, char *argv[])
{
    return foo(1, 0, 0);
}

There are plenty of changes that could still be made to improve readability, such as more comments and replacing the recursive algorithm with iterative loops… but I’ve made my promised twelve improvements, leaving you with plenty of opportunity to make some of your own.





[1] At the time of writing this blog, the original code can be found on the International Obfuscated C Code Contest website on the “Winning Entries” web page, listed as “phillipps.c” under the 1988 sub-heading.

[2] Credit goes to an analysis by Mike Markowski and to a similar exercise in reformatting the source code by Michael Nahas.

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save

Save