Gael Guennebaud | 931027f | 2010-06-26 23:15:06 +0200 | [diff] [blame] | 1 | |
| 2 | #define EIGEN_INTERNAL_DEBUG_CACHE_QUERY |
| 3 | #include <iostream> |
| 4 | #include "../Eigen/Core" |
| 5 | |
| 6 | using namespace Eigen; |
| 7 | using namespace std; |
| 8 | |
| 9 | #define DUMP_CPUID(CODE) {\ |
| 10 | int abcd[4]; \ |
| 11 | abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;\ |
| 12 | EIGEN_CPUID(abcd, CODE, 0); \ |
| 13 | std::cout << "The code " << CODE << " gives " \ |
| 14 | << (int*)(abcd[0]) << " " << (int*)(abcd[1]) << " " \ |
| 15 | << (int*)(abcd[2]) << " " << (int*)(abcd[3]) << " " << std::endl; \ |
| 16 | } |
| 17 | |
| 18 | int main() |
| 19 | { |
| 20 | cout << "Eigen's L1 = " << ei_queryL1CacheSize() << endl; |
| 21 | cout << "Eigen's L2/L3 = " << ei_queryTopLevelCacheSize() << endl; |
| 22 | int l1, l2, l3; |
| 23 | ei_queryCacheSizes(l1, l2, l3); |
| 24 | cout << "Eigen's L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl; |
| 25 | |
| 26 | #ifdef EIGEN_CPUID |
| 27 | |
| 28 | ei_queryCacheSizes_intel(l1, l2, l3); |
| 29 | cout << "Eigen's intel L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl; |
| 30 | ei_queryCacheSizes_amd(l1, l2, l3); |
| 31 | cout << "Eigen's amd L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl; |
| 32 | |
| 33 | int abcd[4]; |
| 34 | int string[8]; |
| 35 | char* string_char = (char*)(string); |
| 36 | |
| 37 | // vendor ID |
| 38 | EIGEN_CPUID(abcd,0x0,0); |
| 39 | string[0] = abcd[1]; |
| 40 | string[1] = abcd[3]; |
| 41 | string[2] = abcd[2]; |
| 42 | string[3] = 0; |
Gael Guennebaud | 5e7bd96 | 2010-06-26 23:37:42 +0200 | [diff] [blame^] | 43 | cout << endl; |
Gael Guennebaud | 931027f | 2010-06-26 23:15:06 +0200 | [diff] [blame] | 44 | cout << "vendor id = " << string_char << endl; |
Gael Guennebaud | 5e7bd96 | 2010-06-26 23:37:42 +0200 | [diff] [blame^] | 45 | cout << endl; |
Gael Guennebaud | 931027f | 2010-06-26 23:15:06 +0200 | [diff] [blame] | 46 | |
| 47 | // dump Intel direct method |
| 48 | { |
| 49 | l1 = l2 = l3 = 0; |
| 50 | int cache_id = 0; |
| 51 | int cache_type = 0; |
| 52 | do { |
| 53 | abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; |
| 54 | EIGEN_CPUID(abcd,0x4,cache_id); |
| 55 | cache_type = (abcd[0] & 0x0F) >> 0; |
| 56 | int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5] |
| 57 | int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22] |
| 58 | int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12] |
| 59 | int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0] |
| 60 | int sets = (abcd[2]); // C[31:0] |
| 61 | int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1); |
| 62 | |
| 63 | cout << "cache[" << cache_id << "].type = " << cache_type << "\n"; |
| 64 | cout << "cache[" << cache_id << "].level = " << cache_level << "\n"; |
| 65 | cout << "cache[" << cache_id << "].ways = " << ways << "\n"; |
| 66 | cout << "cache[" << cache_id << "].partitions = " << partitions << "\n"; |
| 67 | cout << "cache[" << cache_id << "].line_size = " << line_size << "\n"; |
| 68 | cout << "cache[" << cache_id << "].sets = " << sets << "\n"; |
| 69 | cout << "cache[" << cache_id << "].size = " << cache_size << "\n"; |
| 70 | |
| 71 | cache_id++; |
| 72 | } while(cache_type>0); |
| 73 | } |
Gael Guennebaud | 5e7bd96 | 2010-06-26 23:37:42 +0200 | [diff] [blame^] | 74 | |
| 75 | // manual method for intel |
| 76 | { |
| 77 | l1 = l2 = l3 = 0; |
| 78 | abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; |
| 79 | EIGEN_CPUID(abcd,0x00000002,0); |
| 80 | unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2; |
| 81 | for(int i=0; i<14; ++i) |
| 82 | { |
| 83 | switch(bytes[i]) |
| 84 | { |
| 85 | case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines |
| 86 | case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines |
| 87 | case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines |
| 88 | case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64) |
| 89 | case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64) |
| 90 | case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines |
| 91 | case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines |
| 92 | // 56h L0 data TLB, 4M pages, 4 ways, 16 entries |
| 93 | // 57h L0 data TLB, 4K pages, 4 ways, 16 entries |
| 94 | // 59h L0 data TLB, 4K pages, fully, 16 entries |
| 95 | case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored |
| 96 | case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored |
| 97 | case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored |
| 98 | case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored |
| 99 | // 77h code L1 cache, 16 KB, 4 ways, 64 byte lines, sectored (IA-64) |
| 100 | // 96h data L1 TLB, 4K...256M pages, fully, 32 entries (IA-64) |
| 101 | |
| 102 | |
| 103 | case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64) |
| 104 | case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored |
| 105 | case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored |
| 106 | case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored |
| 107 | case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored |
| 108 | case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored |
| 109 | case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored |
| 110 | case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored |
| 111 | case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored |
| 112 | case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored |
| 113 | case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored |
| 114 | case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core) |
| 115 | case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines |
| 116 | case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines |
| 117 | case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines |
| 118 | case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines |
| 119 | case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines |
| 120 | case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines |
| 121 | case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines |
| 122 | case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines |
| 123 | case 0x49: l3 = 4096; break; // code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or |
| 124 | case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines |
| 125 | case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines |
| 126 | case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines |
| 127 | case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines |
| 128 | case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines |
| 129 | case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines |
| 130 | case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored |
| 131 | case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored |
| 132 | case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored |
| 133 | case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored |
| 134 | case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines |
| 135 | case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64) |
| 136 | case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines |
| 137 | case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines |
| 138 | case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines |
| 139 | case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines |
| 140 | case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines |
| 141 | case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines |
| 142 | case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines |
| 143 | case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines |
| 144 | case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines |
| 145 | case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64) |
| 146 | case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64) |
| 147 | case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64) |
| 148 | case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64) |
| 149 | case 0x9B: l2 = 1024; break; // data L2 TLB, 4K...256M pages, fully, 96 entries (IA-64) |
| 150 | |
| 151 | default: break; |
| 152 | } |
| 153 | } |
| 154 | cout << endl; |
| 155 | cout << "tedious way l1 = " << l1 << endl; |
| 156 | cout << "tedious way l2 = " << l2 << endl; |
| 157 | cout << "tedious way l3 = " << l3 << endl; |
| 158 | } |
Gael Guennebaud | 931027f | 2010-06-26 23:15:06 +0200 | [diff] [blame] | 159 | |
| 160 | // dump everything |
| 161 | std::cout << endl <<"Raw dump:" << endl; |
| 162 | DUMP_CPUID(0x0); |
| 163 | DUMP_CPUID(0x1); |
| 164 | DUMP_CPUID(0x2); |
| 165 | DUMP_CPUID(0x3); |
| 166 | DUMP_CPUID(0x4); |
| 167 | DUMP_CPUID(0x5); |
| 168 | DUMP_CPUID(0x6); |
| 169 | DUMP_CPUID(0x80000000); |
| 170 | DUMP_CPUID(0x80000001); |
| 171 | DUMP_CPUID(0x80000002); |
| 172 | DUMP_CPUID(0x80000003); |
| 173 | DUMP_CPUID(0x80000004); |
| 174 | DUMP_CPUID(0x80000005); |
| 175 | DUMP_CPUID(0x80000006); |
| 176 | DUMP_CPUID(0x80000007); |
| 177 | DUMP_CPUID(0x80000008); |
| 178 | #else |
| 179 | cout << "EIGEN_CPUID is not defined" << endl; |
| 180 | #endif |
| 181 | return 0; |
| 182 | } |