ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
cpuid.cpp
Go to the documentation of this file.
1 /* This file is part of the Vc library.
2 
3  Copyright (C) 2011-2012 Matthias Kretz <kretz@kde.org>
4 
5  Vc is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as
7  published by the Free Software Foundation, either version 3 of
8  the License, or (at your option) any later version.
9 
10  Vc is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17 
18 */
19 
20 #include <Vc/cpuid.h>
21 #include <Vc/global.h>
22 
23 namespace ROOT {
24 namespace Vc
25 {
43 CpuId::ushort CpuId::s_prefetch = 32; // The Intel ORM says that if CPUID(2) doesn't set the prefetch size it is 32
49 bool CpuId::s_noL2orL3 = false;
50 
51 #ifdef VC_MSVC
52 } // better not include intrin.h inside the Vc namespace :)
53 } // namespace ROOT
54 #include <intrin.h>
55 namespace ROOT {
56 namespace Vc
57 {
58 #define CPUID(leaf) \
59  do { \
60  int out[4]; \
61  __cpuid(out, leaf); \
62  eax = out[0]; \
63  ebx = out[1]; \
64  ecx = out[2]; \
65  edx = out[3]; \
66  } while (false)
67 #define CPUID_C(leaf, _ecx_) \
68  do { \
69  int out[4]; \
70  __cpuidex(out, leaf, _ecx_); \
71  eax = out[0]; \
72  ebx = out[1]; \
73  ecx = out[2]; \
74  edx = out[3]; \
75  } while (false)
76 #elif defined(__i386__) && defined(__PIC__)
77 // %ebx may be the PIC register.
78 static inline void _Vc_cpuid(int leaf, unsigned int &eax, unsigned int &ebx, unsigned int &ecx, unsigned int &edx)
79 {
80  int tmpb;
81  asm("mov %%ebx, %[tmpb]\n\t"
82  "cpuid\n\t"
83  "mov %%ebx, %[ebx]\n\t"
84  "mov %[tmpb], %%ebx\n\t"
85  : [tmpb]"=m"(tmpb), "=a"(eax), [ebx] "=m"(ebx), "+c"(ecx), "=d"(edx)
86  : [leaf] "a"(leaf)
87  );
88 }
89 #define CPUID(leaf) \
90  ecx = 0; \
91  _Vc_cpuid(leaf, eax, ebx, ecx, edx)
92 #define CPUID_C(leaf, _ecx_) \
93  ecx = _ecx_; \
94  _Vc_cpuid(leaf, eax, ebx, ecx, edx)
95 #else
96 #define CPUID(leaf) \
97  __asm__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(leaf))
98 #define CPUID_C(leaf, _ecx_) \
99  __asm__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(leaf), "c"(_ecx_))
100 #endif
101 static unsigned int CpuIdAmdAssociativityTable(int bits)
102 {
103  switch (bits) {
104  case 0x0: return 0;
105  case 0x1: return 1;
106  case 0x2: return 2;
107  case 0x4: return 4;
108  case 0x6: return 8;
109  case 0x8: return 16;
110  case 0xA: return 32;
111  case 0xB: return 48;
112  case 0xC: return 64;
113  case 0xD: return 96;
114  case 0xE: return 128;
115  case 0xF: return 0xff;
116  }
117  return 0xffffffffu;
118 }
119 
121 {
122  {
123  static bool done = false;
124  if (done) return;
125  done = true;
126  }
127  uint eax, ebx, ecx, edx;
128 
129  CPUID(0);
130  s_ecx0 = ecx;
131 
132  CPUID(1);
133  s_processorFeaturesC = ecx;
134  s_processorFeaturesD = edx;
135  s_processorModel = (eax & 0x000000f0) >> 4;
136  s_processorFamily = (eax & 0x00000f00) >> 8;
137  if (isAmd()) {
138  if (s_processorFamily >= 0xf) {
139  const uchar processorFamilyExt = (eax & 0x0ff00000) >> 20;
140  s_processorFamily += processorFamilyExt;
141  const uchar processorModelExt = (eax & 0x000f0000) >> 12;
142  s_processorModel += processorModelExt;
143  }
144  } else if (s_processorFamily == 0xf) {
145  const uchar processorFamilyExt = (eax & 0x0ff00000) >> 20;
146  s_processorFamily += processorFamilyExt;
147  const uchar processorModelExt = (eax & 0x000f0000) >> 12;
148  s_processorModel += processorModelExt;
149  } else if (s_processorFamily == 0x6) {
150  const uchar processorModelExt = (eax & 0x000f0000) >> 12;
151  s_processorModel += processorModelExt;
152  }
153  s_processorType = static_cast<ProcessorType>((eax & 0x00003000) >> 12);
154 
155  s_brandIndex = ebx & 0xff;
156  ebx >>= 8;
157  s_cacheLineSize = ebx & 0xff;
158  ebx >>= 8;
159  s_logicalProcessors = ebx & 0xff;
160 
161  CPUID(0x80000001);
162  s_processorFeatures8C = ecx;
163  s_processorFeatures8D = edx;
164 
165  if (isAmd()) {
167 
168  CPUID(0x80000005);
169  s_L1DataLineSize = ecx & 0xff;
170  s_L1Data = (ecx >> 24) * 1024;
171  s_L1Associativity = (ecx >> 16) & 0xff;
172  s_L1InstructionLineSize = edx & 0xff;
173  s_L1Instruction = (edx >> 24) * 1024;
174 
175  CPUID(0x80000006);
176  s_L2DataLineSize = ecx & 0xff;
177  s_L2Data = (ecx >> 16) * 1024;
178  s_L2Associativity = CpuIdAmdAssociativityTable((ecx >> 12) & 0xf);
179  s_L3DataLineSize = edx & 0xff;
180  s_L3Data = (edx >> 18) * 512 * 1024;
181  s_L3Associativity = CpuIdAmdAssociativityTable((ecx >> 12) & 0xf);
182  return;
183  }
184 
185  // Intel only
186  int repeat = 0;
187  bool checkLeaf4 = false;
188  do {
189  CPUID(2);
190  if (repeat == 0) {
191  repeat = eax & 0xff;
192  }
193  if (0 == (0x80000000u & eax)) {
194  for (int i = 0; i < 3; ++i) {
195  eax >>= 8;
196  interpret(eax & 0xff, &checkLeaf4);
197  }
198  }
199  if (0 == (0x80000000u & ebx)) {
200  for (int i = 0; i < 4; ++i) {
201  interpret(ebx & 0xff, &checkLeaf4);
202  ebx >>= 8;
203  }
204  }
205  if (0 == (0x80000000u & ecx)) {
206  for (int i = 0; i < 4; ++i) {
207  interpret(ecx & 0xff, &checkLeaf4);
208  ecx >>= 8;
209  }
210  }
211  if (0 == (0x80000000u & edx)) {
212  for (int i = 0; i < 4; ++i) {
213  interpret(edx & 0xff, &checkLeaf4);
214  edx >>= 8;
215  }
216  }
217  } while (--repeat > 0);
218  if (checkLeaf4) {
220  if (s_prefetch == 0) {
221  s_prefetch = 64;
222  }
223  eax = 1;
224  for (int i = 0; eax & 0x1f; ++i) {
225  CPUID_C(4, i);
226  const int cacheLevel = (eax >> 5) & 7;
227  //const int sharedBy = 1 + ((eax >> 14) & 0xfff);
228  const int linesize = 1 + (ebx & 0xfff); ebx >>= 12;
229  const int partitions = 1 + (ebx & 0x3ff); ebx >>= 10;
230  const int ways = 1 + (ebx & 0x3ff);
231  const int sets = 1 + ecx;
232  const int size = ways * partitions * linesize * sets;
233  switch (eax & 0x1f) {
234  case 1: // data cache
235  switch (cacheLevel) {
236  case 1:
237  s_L1Data = size;
238  s_L1DataLineSize = linesize;
239  s_L1Associativity = ways;
240  break;
241  case 2:
242  s_L2Data = size;
243  s_L2DataLineSize = linesize;
244  s_L2Associativity = ways;
245  break;
246  case 3:
247  s_L3Data = size;
248  s_L3DataLineSize = linesize;
249  s_L3Associativity = ways;
250  break;
251  }
252  break;
253  case 2: // instruction cache
254  switch (cacheLevel) {
255  case 1:
256  s_L1Instruction = size;
257  s_L1InstructionLineSize = linesize;
258  break;
259  }
260  break;
261  case 3: // unified cache
262  switch (cacheLevel) {
263  case 1:
264  s_L1Data = size;// / sharedBy;
265  s_L1DataLineSize = linesize;
266  s_L1Associativity = ways;
267  break;
268  case 2:
269  s_L2Data = size;// / sharedBy;
270  s_L2DataLineSize = linesize;
271  s_L2Associativity = ways;
272  break;
273  case 3:
274  s_L3Data = size;// / sharedBy;
275  s_L3DataLineSize = linesize;
276  s_L3Associativity = ways;
277  break;
278  }
279  break;
280  case 0: // no more caches
281  break;
282  default: // reserved
283  break;
284  }
285  }
286  }
287 }
288 
289 void CpuId::interpret(uchar byte, bool *checkLeaf4)
290 {
291  switch (byte) {
292  case 0x06:
293  s_L1Instruction = 8 * 1024;
295  s_L1Associativity = 4;
296  break;
297  case 0x08:
298  s_L1Instruction = 16 * 1024;
300  s_L1Associativity = 4;
301  break;
302  case 0x09:
303  s_L1Instruction = 32 * 1024;
305  s_L1Associativity = 4;
306  break;
307  case 0x0A:
308  s_L1Data = 8 * 1024;
309  s_L1DataLineSize = 32;
310  s_L1Associativity = 2;
311  break;
312  case 0x0C:
313  s_L1Data = 16 * 1024;
314  s_L1DataLineSize = 32;
315  s_L1Associativity = 4;
316  break;
317  case 0x0D:
318  s_L1Data = 16 * 1024;
319  s_L1DataLineSize = 64;
320  s_L1Associativity = 4;
321  break;
322  case 0x0E:
323  s_L1Data = 24 * 1024;
324  s_L1DataLineSize = 64;
325  s_L1Associativity = 6;
326  break;
327  case 0x21:
328  s_L2Data = 256 * 1024;
329  s_L2DataLineSize = 64;
330  s_L2Associativity = 8;
331  break;
332  case 0x22:
333  s_L3Data = 512 * 1024;
334  s_L3DataLineSize = 64;
335  s_L3Associativity = 4;
336  break;
337  case 0x23:
338  s_L3Data = 1024 * 1024;
339  s_L3DataLineSize = 64;
340  s_L3Associativity = 8;
341  break;
342  case 0x25:
343  s_L3Data = 2 * 1024 * 1024;
344  s_L3DataLineSize = 64;
345  s_L3Associativity = 8;
346  break;
347  case 0x29:
348  s_L3Data = 4 * 1024 * 1024;
349  s_L3DataLineSize = 64;
350  s_L3Associativity = 8;
351  break;
352  case 0x2C:
353  s_L1Data = 32 * 1024;
354  s_L1DataLineSize = 64;
355  s_L1Associativity = 8;
356  break;
357  case 0x30:
358  s_L1Data = 32 * 1024;
359  s_L1DataLineSize = 64;
360  s_L1Associativity = 8;
361  break;
362  case 0x40:
363  s_noL2orL3 = true;
364  break;
365  case 0x41:
366  s_L2Data = 128 * 1024;
367  s_L2DataLineSize = 32;
368  s_L2Associativity = 4;
369  break;
370  case 0x42:
371  s_L2Data = 256 * 1024;
372  s_L2DataLineSize = 32;
373  s_L2Associativity = 4;
374  break;
375  case 0x43:
376  s_L2Data = 512 * 1024;
377  s_L2DataLineSize = 32;
378  s_L2Associativity = 4;
379  break;
380  case 0x44:
381  s_L2Data = 1024 * 1024;
382  s_L2DataLineSize = 32;
383  s_L2Associativity = 4;
384  break;
385  case 0x45:
386  s_L2Data = 2 * 1024 * 1024;
387  s_L2DataLineSize = 32;
388  s_L2Associativity = 4;
389  break;
390  case 0x46:
391  s_L3Data = 4 * 1024 * 1024;
392  s_L3DataLineSize = 64;
393  s_L3Associativity = 4;
394  break;
395  case 0x47:
396  s_L3Data = 8 * 1024 * 1024;
397  s_L3DataLineSize = 64;
398  s_L3Associativity = 8;
399  break;
400  case 0x48:
401  s_L2Data = 3 * 1024 * 1024;
402  s_L2DataLineSize = 64;
403  s_L2Associativity = 12;
404  break;
405  case 0x49:
406  if (s_processorFamily == 0xf && s_processorModel == 0x6) {
407  s_L3Data = 4 * 1024 * 1024;
408  s_L3DataLineSize = 64;
409  s_L3Associativity = 16;
410  } else {
411  s_L2Data = 4 * 1024 * 1024;
412  s_L2DataLineSize = 64;
413  s_L2Associativity = 16;
414  }
415  break;
416  case 0x4A:
417  s_L3Data = 6 * 1024 * 1024;
418  s_L3DataLineSize = 64;
419  s_L3Associativity = 12;
420  break;
421  case 0x4B:
422  s_L3Data = 8 * 1024 * 1024;
423  s_L3DataLineSize = 64;
424  s_L3Associativity = 16;
425  break;
426  case 0x4C:
427  s_L3Data = 12 * 1024 * 1024;
428  s_L3DataLineSize = 64;
429  s_L3Associativity = 12;
430  break;
431  case 0x4D:
432  s_L3Data = 16 * 1024 * 1024;
433  s_L3DataLineSize = 64;
434  s_L3Associativity = 16;
435  break;
436  case 0x4E:
437  s_L2Data = 6 * 1024 * 1024;
438  s_L2DataLineSize = 64;
439  s_L2Associativity = 24;
440  break;
441  case 0x60:
442  s_L1Data = 16 * 1024;
443  s_L1DataLineSize = 64;
444  s_L1Associativity = 8;
445  break;
446  case 0x66:
447  s_L1Data = 8 * 1024;
448  s_L1DataLineSize = 64;
449  s_L1Associativity = 4;
450  break;
451  case 0x67:
452  s_L1Data = 16 * 1024;
453  s_L1DataLineSize = 64;
454  s_L1Associativity = 4;
455  break;
456  case 0x68:
457  s_L1Data = 32 * 1024;
458  s_L1DataLineSize = 64;
459  s_L1Associativity = 4;
460  break;
461  case 0x78:
462  s_L2Data = 1024 * 1024;
463  s_L2DataLineSize = 64;
464  s_L2Associativity = 4;
465  break;
466  case 0x79:
467  s_L2Data = 128 * 1024;
468  s_L2DataLineSize = 64;
469  s_L2Associativity = 8;
470  break;
471  case 0x7A:
472  s_L2Data = 256 * 1024;
473  s_L2DataLineSize = 64;
474  s_L2Associativity = 8;
475  break;
476  case 0x7B:
477  s_L2Data = 512 * 1024;
478  s_L2DataLineSize = 64;
479  s_L2Associativity = 8;
480  break;
481  case 0x7C:
482  s_L2Data = 1024 * 1024;
483  s_L2DataLineSize = 64;
484  s_L2Associativity = 8;
485  break;
486  case 0x7D:
487  s_L2Data = 2 * 1024 * 1024;
488  s_L2DataLineSize = 64;
489  s_L2Associativity = 8;
490  break;
491  case 0x7F:
492  s_L2Data = 512 * 1024;
493  s_L2DataLineSize = 64;
494  s_L2Associativity = 2;
495  break;
496  case 0x80:
497  s_L2Data = 512 * 1024;
498  s_L2DataLineSize = 64;
499  s_L2Associativity = 8;
500  break;
501  case 0x82:
502  s_L2Data = 256 * 1024;
503  s_L2DataLineSize = 32;
504  s_L2Associativity = 8;
505  break;
506  case 0x83:
507  s_L2Data = 512 * 1024;
508  s_L2DataLineSize = 32;
509  s_L2Associativity = 8;
510  break;
511  case 0x84:
512  s_L2Data = 1024 * 1024;
513  s_L2DataLineSize = 32;
514  s_L2Associativity = 8;
515  break;
516  case 0x85:
517  s_L2Data = 2 * 1024 * 1024;
518  s_L2DataLineSize = 32;
519  s_L2Associativity = 8;
520  break;
521  case 0x86:
522  s_L2Data = 512 * 1024;
523  s_L2DataLineSize = 64;
524  s_L2Associativity = 4;
525  break;
526  case 0x87:
527  s_L2Data = 1024 * 1024;
528  s_L2DataLineSize = 64;
529  s_L2Associativity = 8;
530  break;
531  case 0xD0:
532  s_L3Data = 512 * 1024;
533  s_L3DataLineSize = 64;
534  s_L3Associativity = 4;
535  break;
536  case 0xD1:
537  s_L3Data = 1024 * 1024;
538  s_L3DataLineSize = 64;
539  s_L3Associativity = 4;
540  break;
541  case 0xD2:
542  s_L3Data = 2 * 1024 * 1024;
543  s_L3DataLineSize = 64;
544  s_L3Associativity = 4;
545  break;
546  case 0xD6:
547  s_L3Data = 1024 * 1024;
548  s_L3DataLineSize = 64;
549  s_L3Associativity = 8;
550  break;
551  case 0xD7:
552  s_L3Data = 2 * 1024 * 1024;
553  s_L3DataLineSize = 64;
554  s_L3Associativity = 8;
555  break;
556  case 0xD8:
557  s_L3Data = 4 * 1024 * 1024;
558  s_L3DataLineSize = 64;
559  s_L3Associativity = 8;
560  break;
561  case 0xDC:
562  s_L3Data = 3 * 512 * 1024;
563  s_L3DataLineSize = 64;
564  s_L3Associativity = 12;
565  break;
566  case 0xDD:
567  s_L3Data = 3 * 1024 * 1024;
568  s_L3DataLineSize = 64;
569  s_L3Associativity = 12;
570  break;
571  case 0xDE:
572  s_L3Data = 6 * 1024 * 1024;
573  s_L3DataLineSize = 64;
574  s_L3Associativity = 12;
575  break;
576  case 0xE2:
577  s_L3Data = 2 * 1024 * 1024;
578  s_L3DataLineSize = 64;
579  s_L3Associativity = 16;
580  break;
581  case 0xE3:
582  s_L3Data = 4 * 1024 * 1024;
583  s_L3DataLineSize = 64;
584  s_L3Associativity = 16;
585  break;
586  case 0xE4:
587  s_L3Data = 8 * 1024 * 1024;
588  s_L3DataLineSize = 64;
589  s_L3Associativity = 16;
590  break;
591  case 0xEA:
592  s_L3Data = 12 * 1024 * 1024;
593  s_L3DataLineSize = 64;
594  s_L3Associativity = 24;
595  break;
596  case 0xEB:
597  s_L3Data = 18 * 1024 * 1024;
598  s_L3DataLineSize = 64;
599  s_L3Associativity = 24;
600  break;
601  case 0xEC:
602  s_L3Data = 24 * 1024 * 1024;
603  s_L3DataLineSize = 64;
604  s_L3Associativity = 24;
605  break;
606  case 0xF0:
607  s_prefetch = 64;
608  break;
609  case 0xF1:
610  s_prefetch = 128;
611  break;
612  case 0xFF:
613  // we have to use CPUID(4) to find out
614  *checkLeaf4 = true;
615  break;
616  default:
617  break;
618  }
619 }
620 } // namespace Vc
621 } // namespace ROOT
622 
623 // vim: sw=4 sts=4 et tw=100
static uint s_processorFeaturesD
Definition: cpuid.h:189
static uint s_logicalProcessors
Definition: cpuid.h:187
static uint s_L1Data
Definition: cpuid.h:193
static ushort s_L3DataLineSize
Definition: cpuid.h:199
static uint s_L3Data
Definition: cpuid.h:195
static bool s_noL2orL3
Definition: cpuid.h:209
static uint s_processorFeatures8C
Definition: cpuid.h:190
#define CPUID_C(leaf, _ecx_)
Definition: cpuid.cpp:98
static uchar s_cacheLineSize
Definition: cpuid.h:205
static uint s_L1Instruction
Definition: cpuid.h:192
static uint s_L1Associativity
Definition: cpuid.h:200
static ushort s_L1InstructionLineSize
Definition: cpuid.h:196
unsigned short ushort
Definition: cpuid.h:45
static uint s_L2Associativity
Definition: cpuid.h:201
static void init()
Reads the CPU capabilities and stores them for faster subsequent access.
Definition: cpuid.cpp:120
static void interpret(uchar byte, bool *checkLeaf4)
Definition: cpuid.cpp:289
static unsigned int CpuIdAmdAssociativityTable(int bits)
Definition: cpuid.cpp:101
static ushort s_prefetch
Definition: cpuid.h:203
static uint s_processorFeaturesC
Definition: cpuid.h:188
static uint s_ecx0
Definition: cpuid.h:186
static bool isAmd()
Return whether the CPU vendor is AMD.
Definition: cpuid.h:75
static uint s_L3Associativity
Definition: cpuid.h:202
unsigned char byte
Definition: gifdecode.c:10
unsigned int uint
Definition: cpuid.h:46
static uchar s_processorModel
Definition: cpuid.h:206
static uint s_processorFeatures8D
Definition: cpuid.h:191
static uint s_L2Data
Definition: cpuid.h:194
static ushort s_L2DataLineSize
Definition: cpuid.h:198
unsigned char uchar
Definition: cpuid.h:44
static ushort cacheLineSize()
Return the cache line size in bits.
Definition: cpuid.h:65
static ushort s_L1DataLineSize
Definition: cpuid.h:197
static ProcessorType s_processorType
Definition: cpuid.h:208
#define CPUID(leaf)
Definition: cpuid.cpp:96
static uchar s_processorFamily
Definition: cpuid.h:207
static uchar s_brandIndex
Definition: cpuid.h:204