blob: 3950f96e5416091026f1f1f81b80bb90af3c3c05 [file] [log] [blame]
Gael Guennebaud931027f2010-06-26 23:15:06 +02001
2#define EIGEN_INTERNAL_DEBUG_CACHE_QUERY
3#include <iostream>
4#include "../Eigen/Core"
5
6using namespace Eigen;
7using namespace std;
8
9#define DUMP_CPUID(CODE) {\
10 int abcd[4]; \
11 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;\
12 EIGEN_CPUID(abcd, CODE, 0); \
13 std::cout << "The code " << CODE << " gives " \
14 << (int*)(abcd[0]) << " " << (int*)(abcd[1]) << " " \
15 << (int*)(abcd[2]) << " " << (int*)(abcd[3]) << " " << std::endl; \
16 }
17
18int main()
19{
20 cout << "Eigen's L1 = " << ei_queryL1CacheSize() << endl;
21 cout << "Eigen's L2/L3 = " << ei_queryTopLevelCacheSize() << endl;
22 int l1, l2, l3;
23 ei_queryCacheSizes(l1, l2, l3);
24 cout << "Eigen's L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl;
25
26 #ifdef EIGEN_CPUID
27
28 ei_queryCacheSizes_intel(l1, l2, l3);
29 cout << "Eigen's intel L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl;
30 ei_queryCacheSizes_amd(l1, l2, l3);
31 cout << "Eigen's amd L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl;
32
33 int abcd[4];
34 int string[8];
35 char* string_char = (char*)(string);
36
37 // vendor ID
38 EIGEN_CPUID(abcd,0x0,0);
39 string[0] = abcd[1];
40 string[1] = abcd[3];
41 string[2] = abcd[2];
42 string[3] = 0;
Gael Guennebaud5e7bd962010-06-26 23:37:42 +020043 cout << endl;
Gael Guennebaud931027f2010-06-26 23:15:06 +020044 cout << "vendor id = " << string_char << endl;
Gael Guennebaud5e7bd962010-06-26 23:37:42 +020045 cout << endl;
Gael Guennebaud931027f2010-06-26 23:15:06 +020046
47 // dump Intel direct method
48 {
49 l1 = l2 = l3 = 0;
50 int cache_id = 0;
51 int cache_type = 0;
52 do {
53 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
54 EIGEN_CPUID(abcd,0x4,cache_id);
55 cache_type = (abcd[0] & 0x0F) >> 0;
56 int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
57 int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
58 int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
59 int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
60 int sets = (abcd[2]); // C[31:0]
61 int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
62
63 cout << "cache[" << cache_id << "].type = " << cache_type << "\n";
64 cout << "cache[" << cache_id << "].level = " << cache_level << "\n";
65 cout << "cache[" << cache_id << "].ways = " << ways << "\n";
66 cout << "cache[" << cache_id << "].partitions = " << partitions << "\n";
67 cout << "cache[" << cache_id << "].line_size = " << line_size << "\n";
68 cout << "cache[" << cache_id << "].sets = " << sets << "\n";
69 cout << "cache[" << cache_id << "].size = " << cache_size << "\n";
70
71 cache_id++;
72 } while(cache_type>0);
73 }
Gael Guennebaud5e7bd962010-06-26 23:37:42 +020074
75 // manual method for intel
76 {
77 l1 = l2 = l3 = 0;
78 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
79 EIGEN_CPUID(abcd,0x00000002,0);
80 unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
81 for(int i=0; i<14; ++i)
82 {
83 switch(bytes[i])
84 {
85 case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
86 case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
87 case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
88 case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
89 case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
90 case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
91 case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
92 // 56h L0 data TLB, 4M pages, 4 ways, 16 entries
93 // 57h L0 data TLB, 4K pages, 4 ways, 16 entries
94 // 59h L0 data TLB, 4K pages, fully, 16 entries
95 case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
96 case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
97 case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
98 case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
99 // 77h code L1 cache, 16 KB, 4 ways, 64 byte lines, sectored (IA-64)
100 // 96h data L1 TLB, 4K...256M pages, fully, 32 entries (IA-64)
101
102
103 case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
104 case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
105 case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
106 case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
107 case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
108 case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
109 case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
110 case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
111 case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
112 case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
113 case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
114 case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
115 case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
116 case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
117 case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
118 case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
119 case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
120 case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
121 case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
122 case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
123 case 0x49: l3 = 4096; break; // code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or
124 case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
125 case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
126 case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
127 case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
128 case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
129 case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
130 case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
131 case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
132 case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
133 case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
134 case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
135 case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
136 case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
137 case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
138 case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
139 case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
140 case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
141 case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
142 case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
143 case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
144 case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
145 case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
146 case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
147 case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
148 case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
149 case 0x9B: l2 = 1024; break; // data L2 TLB, 4K...256M pages, fully, 96 entries (IA-64)
150
151 default: break;
152 }
153 }
154 cout << endl;
155 cout << "tedious way l1 = " << l1 << endl;
156 cout << "tedious way l2 = " << l2 << endl;
157 cout << "tedious way l3 = " << l3 << endl;
158 }
Gael Guennebaud931027f2010-06-26 23:15:06 +0200159
160 // dump everything
161 std::cout << endl <<"Raw dump:" << endl;
162 DUMP_CPUID(0x0);
163 DUMP_CPUID(0x1);
164 DUMP_CPUID(0x2);
165 DUMP_CPUID(0x3);
166 DUMP_CPUID(0x4);
167 DUMP_CPUID(0x5);
168 DUMP_CPUID(0x6);
169 DUMP_CPUID(0x80000000);
170 DUMP_CPUID(0x80000001);
171 DUMP_CPUID(0x80000002);
172 DUMP_CPUID(0x80000003);
173 DUMP_CPUID(0x80000004);
174 DUMP_CPUID(0x80000005);
175 DUMP_CPUID(0x80000006);
176 DUMP_CPUID(0x80000007);
177 DUMP_CPUID(0x80000008);
178 #else
179 cout << "EIGEN_CPUID is not defined" << endl;
180 #endif
181 return 0;
182}