// OptimalPermute.cpp : Defines the entry point for the console application. // Copyright (C) April 30, 2011 - George W. Taylor - All rights reserved // This console app takes as the command line argument the number of digits in // the desired set. The permutations will be bit-packed into nibbles. // This version employs an optimized Difference Table rather than store the // permutations themselves. The starting set and final permutation will be displayed. // All permutations of the desired set will be generated using the algorithm discussed // in "A Permutation on Combinatorial Algorithms" in a bit-packed 64 bit integer. // Each permutation will be verified against another set permuted independently via // conventional means if desired. // This is intended to be compiled as a 64 bit application. // A 13 digit set (0,1,2,3,4,5,6,7,8,9,a,b,c) requires 1 to 11 Gb // of memory, and is the largest set that most common platforms // can do, because the next size up, 14 digits, would require // 14 times as much memory, ~24 Gb. 13 digits have 6,227,020,800 // possible permutations, the factorial of 13. This takes just // 5.4 minutes to compute with an Intel i5 CPU under Win 7 // 64 bit. Of course this code is far from optimal, having been // developed with the sole objective of illustrating the algorithm. // This version will allocate as much memory as possible for cases // where required table size exceeds RAM available on the platform. // Set MAX_GIGABYTES to limit memory such that you leave about 2 Gb // for the OS. It doesn't run any faster if you try to push for every // last bit of memory, and if you ask for too much some of it will // end up getting swapped out to disk and slow things down dramatically. // On my Win 7 64 bit platform with this compiled as a 64 bit release // version, the optimized table will generate the permutations of a 13 // digit set in 5.4 minutes, as compared to the normal table's 6.8 minutes. // Compare those times to the 8.1 minutes required using the permutation // generator alone, without the help of the meta-algorithm. // See http://www.tropicalcoder.com/APermutationOnCombinatorialAlgorithms.htm #include #include #include #include #include #include #include using namespace std; #define MAX_GIGABYTES 6 #define GIGABYTE (1024 * 1024 * 1024) #define HALF_GIGABYTE (GIGABYTE / 2) #define QUARTER_GIGABYTE (HALF_GIGABYTE / 2) #define MAX_DIGITS 16 #define MIN_DIGITS 4 #define MAX_OFFSETS 14 #define USE_TIMER // #define VALIDATE // uncomment this to enable validation #define MAX_SUBSETS 256 #define MAX_STACK (1024 * 40) // * 40 for 13 digits #define MAX_SUPPLEMENTAL (1024 * 32) // * 16 for 13 digits UINT64 *SupplementalTable; UINT64 *Factorials = NULL; UINT64 *Offsets = NULL; DWORD *DiffTable = NULL; UINT64 indexTable, maxTableIndex, countPermutations, maxPermutations; UINT64 previousPermutation, currentPermutation, countGenerated = 0; UINT64 validator, countSingletons = 0; UINT64 countExceededDWORD = 0; DWORD indexSupplementalTable; LPBYTE subsetStack; int stackSize; BYTE maxOffset; void GeneratePermutations(UINT64 count, BYTE setSize); void ShowPermutations(UINT64 *Permutations, UINT64 nCountPermutations, BYTE setSize); #ifdef USE_TIMER // #define NANOSECS // Timer delivers nanosecs with this defined - #undef this if you want time in microsecs LARGE_INTEGER prevPerformanceCount, PerformanceCount, Frequency; void TimerInit(void) { prevPerformanceCount.LowPart = 0; prevPerformanceCount.HighPart = 0; PerformanceCount.LowPart = 0; PerformanceCount.HighPart = 0; Frequency.LowPart = 0; Frequency.HighPart = 0; QueryPerformanceFrequency(&Frequency); } void TimerStart(void) { QueryPerformanceCounter(&prevPerformanceCount); } void TimerEnd(void) { QueryPerformanceCounter(&PerformanceCount); } UINT64 TimerGetInterval(void) { UINT64 ticks; long double time; ticks = PerformanceCount.LowPart - prevPerformanceCount.LowPart; #ifdef NANOSECS time = (long double)ticks / (long double)Frequency.LowPart) * 1000000000; #else time = ((long double)ticks / (long double)Frequency.LowPart) * 1000000; #endif return (UINT64)floor(time); } #endif // Get the factorial of NumDigits >= 3 UINT64 Factorial(unsigned NumDigits) { UINT64 factorial = 2; for(unsigned n = 3; n <= NumDigits; n++) { factorial *= n; } return factorial; } void GetFactorials(unsigned setSize) { Factorials = (UINT64*) new BYTE [sizeof(UINT64) * (setSize + 1)]; ZeroMemory(Factorials, sizeof(UINT64) * (setSize + 1)); for(unsigned i = 3; i <= setSize; i++) { Factorials[i] = Factorial(i); } } UINT64 allocateMemory(void) { UINT64 size = GIGABYTE; size *= MAX_GIGABYTES; while(size) { DiffTable = NULL; try { DiffTable = (DWORD*) new BYTE [size]; } catch(bad_alloc&) { size -= QUARTER_GIGABYTE; continue; } break; } return size; } UINT64 allocateMaxTableMemory(void) { UINT64 tableSize; // How much memory can we get? maxTableIndex = allocateMemory(); int gigs = (int)(maxTableIndex / GIGABYTE); int frac = 5 * (int)((maxTableIndex % GIGABYTE) / HALF_GIGABYTE); printf("\nTable memory allocated: %I64d\n", maxTableIndex); printf("Gigabytes of Table memory allocated: %d.%d\n", gigs, frac); tableSize = maxTableIndex / sizeof(DWORD); printf("Table size: %I64d\n", tableSize); ZeroMemory(DiffTable, sizeof(DWORD) * tableSize); return tableSize; } void GeneratePermutation(UINT64 *pCurrentPermutation, BYTE setSize) { UINT64 p, mask; UINT64 shiftA, shiftB; p = *pCurrentPermutation; // Find the largest index k such that a[k] < a[k + 1]. mask = 0xF; short k, l, largestIndex = -1; for(k = (setSize - 2); k >= 0; k--) { // if(pTable[k] < pTable[k+1]) shiftA = ((setSize - 1) - k) * 4; shiftB = ((setSize - 1) - (k+1)) * 4; if( ((p >> shiftA) & mask) < ((p >> shiftB) & mask) ) { if(k > largestIndex)largestIndex = k; } } // If no such index exists, the permutation is the last permutation. if(largestIndex == -1)return; // Find the largest index l such that a[k] < a[l]. k = largestIndex; largestIndex = -1; shiftA = ((setSize - 1) - k) * 4; for(l = (setSize - 1); l >= 0; l--) { // if(pTable[k] < pTable[l]) shiftB = ((setSize - 1) - l) * 4; if( ((p >> shiftA) & mask) < ((p >> shiftB) & mask) ) { if(l > largestIndex)largestIndex = l; } } // Since k + 1 is such an index, l is well defined and satisfies k < l. // Swap a[k] with a[l]. l = largestIndex; // BYTE tmp = pTable[k]; shiftA = ((setSize - 1) - k) * 4; shiftB = ((setSize - 1) - l) * 4; UINT64 tmpA = ((p >> shiftA) & mask); UINT64 tmpB = ((p >> shiftB) & mask); // pTable[k] = pTable[l]; mask <<= shiftA; p |= mask; p ^= mask; p |= (tmpB << shiftA); // pTable[l] = tmp; mask = 0xF; mask <<= shiftB; p |= mask; p ^= mask; p |= (tmpA << shiftB); // Reverse the sequence from a[k + 1] up to and including the final element a[n]. ++k; l = setSize - 1; while(l > k) { shiftA = ((setSize - 1) - k) * 4; shiftB = ((setSize - 1) - l) * 4; // tmp = pTable[k]; mask = 0xF; tmpA = ((p >> shiftA) & mask); tmpB = ((p >> shiftB) & mask); // pTable[k] = pTable[l]; mask <<= shiftA; p |= mask; p ^= mask; p |= (tmpB << shiftA); // pTable[l] = tmp; mask = 0xF; mask <<= shiftB; p |= mask; p ^= mask; p |= (tmpA << shiftB); ++k; --l; } *pCurrentPermutation = p; } // This function will verify each permutation generated // by checking it against an independently generated permutation. // 'validator' must initialized with the same set before we begin. // This function must be called every single time a new permutation // is created to stay in sync. bool ValidatePermutation(UINT64 permutation, BYTE setSize) { UINT64 mask = 0xF; GeneratePermutation(&validator, setSize); if(permutation != validator) { printf("Error: Permuation is invalid!\n"); ShowPermutations(&validator, 1, setSize); return false; } return true; } // Called only for digits >= 4 UINT64 getOffset(BYTE digits, UINT64 indexTable) { if(Offsets[digits] == 0) { Offsets[digits] = indexTable - 2; // printf("Created table for %d digits\n", digits); if(digits == 3) { printf("Added to table %d\n", 3); }else { printf("Added to table %I64d\n", Offsets[digits] - Offsets[digits - 1]); } } return Offsets[digits]; } // Add differences backwards through the table void addDifferences(UINT64 count, UINT64 offset, BYTE setSize) { for(UINT64 n = 0; n < count; n++) { if((DiffTable[offset] & 0x0000FFFF) == 0) { // The difference is to be found in the supplimental table currentPermutation += SupplementalTable[DiffTable[offset] >> 16]; }else { currentPermutation += DiffTable[offset]; } #ifdef VALIDATE if(!ValidatePermutation(currentPermutation, setSize)) { ShowPermutations(¤tPermutation, 1, setSize); break; } #endif previousPermutation = currentPermutation; ++countPermutations; --offset; } } // Add differences while advancing through the table void topHalf(UINT64 count, UINT64 offset, BYTE setSize) { UINT64 n; for(n = 0; n < count; n++) { if((DiffTable[offset] & 0x0000FFFF) == 0) { // The difference is to be found in the supplimental table currentPermutation += SupplementalTable[DiffTable[offset] >> 16]; }else { currentPermutation += DiffTable[offset]; } #ifdef VALIDATE if(!ValidatePermutation(currentPermutation, setSize)) { ShowPermutations(¤tPermutation, 1, setSize); break; } #endif previousPermutation = currentPermutation; ++countPermutations; ++offset; } } // First read the table advancing from beginning, // then read it backwards. We are taking advantage of the // symmetry of each set of differences. We only need to store // half the set + 1 to replicate the entire set. void addDiffs(UINT64 count, UINT64 offset, BYTE setSize) { UINT64 n; for(n = 0; n < ((count/2) + 1); n++) { if((DiffTable[offset] & 0x0000FFFF) == 0) { // The difference is to be found in the supplimental table currentPermutation += SupplementalTable[DiffTable[offset] >> 16]; }else { currentPermutation += DiffTable[offset]; } #ifdef VALIDATE if(!ValidatePermutation(currentPermutation, setSize)) { ShowPermutations(¤tPermutation, 1, setSize); break; } #endif previousPermutation = currentPermutation; ++countPermutations; ++offset; } offset -= 2; for(n = 0; n < (count/2); n++) { if((DiffTable[offset] & 0x0000FFFF) == 0) { // The difference is to be found in the supplimental table currentPermutation += SupplementalTable[DiffTable[offset] >> 16]; }else { currentPermutation += DiffTable[offset]; } #ifdef VALIDATE if(!ValidatePermutation(currentPermutation, setSize)) { break; } #endif previousPermutation = currentPermutation; ++countPermutations; --offset; } } // Calculate the offset for bottom of subset in the table // and add those differences via topHalf() void GetTopHalfFromTable(UINT64 count, BYTE digits, BYTE setSize) { UINT64 offset; // We already have this in the table if(digits == 3) { // count = 3; offset = 1; }else { if(count != (getOffset(digits, indexTable) - getOffset(digits - 1, indexTable))) { // We never enter here. This code is more for illustrative purposes, // showing another way of calculating num differences stored for each subset. printf("Discrepancy in count!\n"); } offset = 2 + getOffset(digits, indexTable) - count; } topHalf(count, offset, setSize); } // Writes 'count' differences to global DiffTable[indexTable] void GeneratePermutations(UINT64 count, BYTE setSize) { UINT64 mask; UINT64 shiftA, shiftB; for(UINT64 i = 0; i < count; i++) { // Find the largest index k such that a[k] < a[k + 1]. mask = 0xF; short k, l, largestIndex = -1; for(k = (setSize - 2); k >= 0; k--) { // if(pTable[k] < pTable[k+1]) shiftA = ((setSize - 1) - k) * 4; shiftB = ((setSize - 1) - (k+1)) * 4; if( ((currentPermutation >> shiftA) & mask) < ((currentPermutation >> shiftB) & mask) ) { if(k > largestIndex)largestIndex = k; } } // If no such index exists, the permutation is the last permutation. if(largestIndex == -1)break; // Find the largest index l such that a[k] < a[l]. k = largestIndex; largestIndex = -1; shiftA = ((setSize - 1) - k) * 4; for(l = (setSize - 1); l >= 0; l--) { // if(pTable[k] < pTable[l]) shiftB = ((setSize - 1) - l) * 4; if( ((currentPermutation >> shiftA) & mask) < ((currentPermutation >> shiftB) & mask) ) { if(l > largestIndex)largestIndex = l; } } // Since k + 1 is such an index, l is well defined and satisfies k < l. // Swap a[k] with a[l]. l = largestIndex; // BYTE tmp = pTable[k]; shiftA = ((setSize - 1) - k) * 4; shiftB = ((setSize - 1) - l) * 4; UINT64 tmpA = ((currentPermutation >> shiftA) & mask); UINT64 tmpB = ((currentPermutation >> shiftB) & mask); // pTable[k] = pTable[l]; mask <<= shiftA; currentPermutation |= mask; currentPermutation ^= mask; currentPermutation |= (tmpB << shiftA); // pTable[l] = tmp; mask = 0xF; mask <<= shiftB; currentPermutation |= mask; currentPermutation ^= mask; currentPermutation |= (tmpA << shiftB); // Reverse the sequence from a[k + 1] up to and including the final element a[n]. ++k; l = setSize - 1; while(l > k) { shiftA = ((setSize - 1) - k) * 4; shiftB = ((setSize - 1) - l) * 4; // tmp = pTable[k]; mask = 0xF; tmpA = ((currentPermutation >> shiftA) & mask); tmpB = ((currentPermutation >> shiftB) & mask); // pTable[k] = pTable[l]; mask <<= shiftA; currentPermutation |= mask; currentPermutation ^= mask; currentPermutation |= (tmpB << shiftA); // pTable[l] = tmp; mask = 0xF; mask <<= shiftB; currentPermutation |= mask; currentPermutation ^= mask; currentPermutation |= (tmpA << shiftB); ++k; --l; } #ifdef VALIDATE if(!ValidatePermutation(currentPermutation, setSize)) { break; } #endif if(count > 1) // We don't store the "singletons" in the table (count = 1) { if(indexTable < maxTableIndex) { // using 'mask' as a temporary variable rather than create yet another with more appropriate name mask = currentPermutation - previousPermutation; if((mask & 0xFFFF0000) == mask) { // The memory scheme depends on that fact that there are no differences // who's lo word is zero, and that there are < 65535 diffs that exceed a DWORD. printf("Can't use this memory scheme with this set!\n"); // Doesn't happen break; } if(mask > 0xFFFFFFFF) { // This difference exceeds a DWORD! ++countExceededDWORD; if(indexSupplementalTable < MAX_SUPPLEMENTAL) { // Index is stored in hi word of difference DiffTable[indexTable++] = indexSupplementalTable << 16; // and the difference is stored in the supplimental table. SupplementalTable[indexSupplementalTable++] = mask; }else break; }else DiffTable[indexTable++] = (DWORD)mask; } } previousPermutation = currentPermutation; ++countPermutations; ++countGenerated; } } UINT64 getUpperBodyCount(BYTE digits) { // How many permutations required for current subset? UINT64 count = Factorials[digits]/2; if(digits > 3)++count; // From this subtract what we already have from previous subsets... int numDigits = digits; while(--numDigits >= 3) { count -= Factorials[numDigits]; } return count; } // If the table is less than full size, we use the differences we have // and the permuation generator to do the rest of the job void CompleteWithPartialTable(UINT64 count, UINT64 remain, BYTE digits, BYTE setSize) { printf("Final count required: %I64d\n", count); count -= remain; printf("Not in table, digits: %d, count: %I64d\n", digits, count); GeneratePermutations(count, setSize); printf("In table, digits: %d, count: %I64d\n", digits, indexTable-1); addDifferences(remain, indexTable-1, setSize); } // Get the permutations for the central portion of // the set, and add to the the reflection of the entire subset, // who's makeup is determined from the stack. void GetBodyCurrentSubset(BYTE digits, BYTE setSize) { UINT64 offset, count = getUpperBodyCount(digits); if(Offsets[digits] != 0) { // We already have this in the table GetTopHalfFromTable(count, digits, setSize); }else { // Generate the permutations for upper half of main body GeneratePermutations(count, setSize); } // Each subset size is saved on the stack so that // we might know how to reconstruct it as a reflection. subsetStack[stackSize++] = digits; // We just completed the upper half of a set or subset. // We need the reflection of the entire thing, // for which count = Factorials[digits]/2-1. // In previous versions it would all be in the table, // but now we have to create it piece by piece // using the subset fragments stored in table. // Doing bottom half of body count -= 1; offset = getOffset(digits, indexTable); // Was: offset = indexTable - 2; // We may use a smaller table that only stores the differences // for a minimum of setSize-1. Check here if that is the case... if(digits == setSize) { // How many do we have in the table? UINT64 remain = (offset - getOffset(digits - 1, indexTable)); if(remain < count) { CompleteWithPartialTable(count, remain, digits, setSize); }else addDifferences(count, offset, setSize); }else { // We have the full table of differences addDifferences(count, offset, setSize); } // Now do the remainder, the reflection of all the subsets that nake up this set if(digits > 3) { int i; // Go back to the first instance of current set for(i = 0; i < stackSize; i++) { if(subsetStack[i] == digits)break; } // We begin with the set before that, // since we just did the body of current set above while(--i >= 0) // Go on back through the subsets in reverse order { BYTE subset = subsetStack[i]; if(digits < setSize) { // We add these to the stack as well, // such that the entire structure is elaborated // for future reference. subsetStack[stackSize++] = subset; // We must ensure that the stack is big enough. // Make stack bigger if necessary. if(stackSize == MAX_STACK)return; } if(subset == 1) { // I call this a "singleton" - an unique // permutation that is emitted between subsets. GeneratePermutations(1, setSize); }else { // regenerate the body of this subset count = getUpperBodyCount(subset); GetTopHalfFromTable(count, subset, setSize); count -= 1; offset = getOffset(subset, indexTable); addDifferences(count, offset, setSize); } } } if(digits == setSize) { return; // Done! } // Did we need bigger arrays? if(stackSize >= MAX_STACK)return; // done in if(indexSupplementalTable >= MAX_SUPPLEMENTAL)return; if(digits > 3) { // Emit a "singleton" between each subset > 3 GeneratePermutations(1, setSize); subsetStack[stackSize++] = 1; ++countSingletons; } } // The Algorithm void recurseTree(BYTE digits, BYTE setSize) { if(digits > 3) { recurseTree(digits-1, setSize); } GetBodyCurrentSubset(digits, setSize); if(stackSize >= MAX_STACK) { printf("\nExceeded Stack size!\n"); return; } if(indexSupplementalTable >= MAX_SUPPLEMENTAL) { printf("\nExceeded SupplementalTable size!\n"); return; } if(digits > 3) { // Are we done yet? if(countPermutations >= maxPermutations)return; if(stackSize >= MAX_STACK)return; if(indexSupplementalTable >= MAX_SUPPLEMENTAL)return; recurseTree(digits-1, setSize); } } void ShowPermutations(UINT64 *Permutations, UINT64 nCountPermutations, BYTE setSize) { UINT64 mask; mask = 0xF; for(UINT64 i = 0; i < nCountPermutations; i++) { for(BYTE j = 0; j < setSize; j++) { UINT64 shift = ((setSize - 1) - j) * 4; printf("%x", (BYTE)((Permutations[i] >> shift) & mask)); } printf("\n"); } } int main(int argc, char* argv[]) { UINT64 time, minTableSize, maxTableSize, tableSize; UINT64 val, shift; BYTE tmp, setSize; bool bBadInput = false; DiffTable = NULL; subsetStack = NULL; SupplementalTable = NULL; Offsets = NULL; if(argc < 2) { bBadInput = true; goto ERR_OUT; } printf("Set size: %s\n", argv[1]); setSize = atoi(argv[1]); if((setSize < MIN_DIGITS) || (setSize > MAX_DIGITS)) { bBadInput = true; goto ERR_OUT; } GetFactorials(setSize); subsetStack = new BYTE [MAX_STACK]; ZeroMemory(subsetStack, MAX_STACK); stackSize = 0; SupplementalTable = new UINT64 [MAX_SUPPLEMENTAL]; indexSupplementalTable = 0; Offsets = new UINT64 [setSize + 1]; ZeroMemory(Offsets, sizeof(UINT64) * (setSize + 1)); maxOffset = setSize + 1; // How big a table do we need? // Note: amounts to about 45% of Factorials[setSize] tableSize = 0; for(tmp = setSize; tmp >= 3; tmp--) { tableSize += getUpperBodyCount(tmp); } tableSize += 1; minTableSize = tableSize - getUpperBodyCount(setSize); maxTableSize = tableSize; // What is this in Gb? val = minTableSize; val *= sizeof(DWORD); if((val % GIGABYTE) != 0) { val /= GIGABYTE; ++val; }else val /= GIGABYTE; if(val) { printf("Min Gb of table memory required: %d\n", (int)val); } // What is this in Gb? val = tableSize; val *= sizeof(DWORD); if((val % GIGABYTE) != 0) { val /= GIGABYTE; ++val; }else val /= GIGABYTE; if(val) { printf("Max Gb of table memory required: %d\n", (int)val); } if(val > MAX_GIGABYTES) { tableSize = allocateMaxTableMemory(); // table size in dwords returned if(tableSize < minTableSize) { printf("Insufficient memory for table for this set.\n"); goto ERR_OUT; } if(tableSize > maxTableSize) { // We cannot use a table that is bigger than required tableSize = maxTableSize; printf("Allocated more memory for table than necessary!\n"); } }else { try { DiffTable = new DWORD [tableSize]; } catch (bad_alloc&) { printf("Insufficient memory for table for this set.\n", setSize); goto ERR_OUT; } } // Generate the set in ascending order val = currentPermutation = 0; for(tmp = 0; tmp < setSize; tmp++) { shift = ((setSize - 1) - tmp) * 4; currentPermutation |= (val << shift); ++val; printf("%x ", tmp); } printf("\n\n"); maxPermutations = Factorials[setSize]; previousPermutation = currentPermutation; validator = currentPermutation; DiffTable[0] = 0; // This location is not used. indexTable = 1; countPermutations = 1; maxTableIndex = tableSize; //************************************** #ifdef USE_TIMER TimerInit(); TimerStart(); #endif recurseTree(setSize, setSize); // Compare with time required to generate permutations without // the help of this algorithm by running this instead... // GeneratePermutations(maxPermutations, setSize); #ifdef USE_TIMER TimerEnd(); time = TimerGetInterval(); #endif // Show last permuatation ShowPermutations(¤tPermutation, 1, setSize); printf("\nCount permutations expected: %I64d\n", maxPermutations); printf("Count permutations created: %I64d\n", countPermutations); // Show count of permutations that had to be laborously generated // as opposed to using the addition of differences printf("Count permutations generated: %I64d\n", countGenerated); printf("Table size used: %I64d\n", indexTable); printf("stackSize: %i\n", stackSize); printf("countExceededDWORD: %I64d\n", countExceededDWORD); printf("in SupplementalTable: %I64d\n", indexSupplementalTable); printf("Count singletons: %I64d\n", countSingletons); #ifdef USE_TIMER printf("\nMinutes x 10: %I64d\n", time/6000000); printf("Seconds: %I64d\n", time/1000000); printf("Microseconds: %I64d\n", time); #endif ERR_OUT: if(bBadInput) { printf("Usage: Supply number digits in set to permute %d thru %d as command line arugument\n", MIN_DIGITS, MAX_DIGITS); } if(DiffTable != NULL) delete [] DiffTable; if(subsetStack != NULL) delete [] subsetStack; if(SupplementalTable != NULL) delete SupplementalTable; if(Offsets != NULL) delete Offsets; printf("\nAny key exits\n"); while(!_kbhit()); return 0; }