Code that uses an iterate interface. Time complexity is O(n^2), Space complexity has an overhead of: copy of n (log n bits), an iteration variable (log n bits), keeping track of n-i (log n bits), , copy of current value (log n bits), copy of p (n log n bits), creation of next value (log n bits), and a bit set of used values (n bits). You can't avoid an overhead of n log n bits. Timewise, this is also O(n^2), for setting the bits. This can be reduced a bit, but at the cost of using a decorated tree to store the used values.
This can be altered to use arbitrary precision integers and bit sets by using calls to the appropriate libraries instead, and the above bounds will actually start to kick in, rather than being capped at N=8, portably (an int can be the same as a short, and as small as 16 bits). 9! = 362880 > 65536 = 2^16
#include <math.h>
#include <stdio.h>
typedef signed char index_t;
typedef unsigned int permutation;
static index_t permutation_next(index_t n, permutation p, index_t value)
{
permutation used = 0;
for (index_t i = 0; i < n; ++i) {
index_t left = n - i;
index_t digit = p % left;
p /= left;
for (index_t j = 0; j <= digit; ++j) {
if (used & (1 << j)) {
digit++;
}
}
used |= (1 << digit);
if (value == -1) {
return digit;
}
if (value == digit) {
value = -1;
}
}
/* value not found */
return -1;
}
static void dump_permutation(index_t n, permutation p)
{
index_t value = -1;
fputs("[", stdout);
value = permutation_next(n, p, value);
while (value != -1) {
printf("%d", value);
value = permutation_next(n, p, value);
if (value != -1) {
fputs(", ", stdout);
}
}
puts("]");
}
static int factorial(int n)
{
int prod = 1;
for (int i = 1; i <= n; ++i) {
prod *= i;
}
return prod;
}
int main(int argc, char **argv)
{
const index_t n = 4;
const permutation max = factorial(n);
for (permutation p = 0; p < max; ++p) {
dump_permutation(n, p);
}
}