cycle.h

Go to the documentation of this file.
00001 
00036 /* $Id: cycle.h,v 1.3 2006/10/15 19:44:35 rvuduc Exp $ */
00037 
00038 /* machine-dependent cycle counters code. Needs to be inlined. */
00039 
00040 /***************************************************************************/
00041 /* To use the cycle counters in your code, simply #include "cycle.h" (this
00042    file), and then use the functions/macros:
00043 
00044                  ticks getticks(void);
00045 
00046    ticks is an opaque typedef defined below, representing the current time.
00047    You extract the elapsed time between two calls to gettick() via:
00048 
00049                  double elapsed(ticks t1, ticks t0);
00050 
00051    which returns a double-precision variable in arbitrary units.  You
00052    are not expected to convert this into human units like seconds; it
00053    is intended only for *comparisons* of time intervals.
00054 
00055    (In order to use some of the OS-dependent timer routines like
00056    Solaris' gethrtime, you need to paste the autoconf snippet below
00057    into your configure.ac file and #include "config.h" before cycle.h,
00058    or define the relevant macros manually if you are not using autoconf.)
00059 */
00060 
00061 /***************************************************************************/
00062 /* This file uses macros like HAVE_GETHRTIME that are assumed to be
00063    defined according to whether the corresponding function/type/header
00064    is available on your system.  The necessary macros are most
00065    conveniently defined if you are using GNU autoconf, via the tests:
00066    
00067    dnl ---------------------------------------------------------------------
00068 
00069    AC_C_INLINE
00070    AC_HEADER_TIME
00071    AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h])
00072 
00073    AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H
00074 #include <sys/time.h>
00075 #endif])
00076 
00077    AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime])
00078 
00079    dnl Cray UNICOS _rtc() (real-time clock) intrinsic
00080    AC_MSG_CHECKING([for _rtc intrinsic])
00081    rtc_ok=yes
00082    AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H
00083 #include <intrinsics.h>
00084 #endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no])
00085    AC_MSG_RESULT($rtc_ok)
00086 
00087    dnl ---------------------------------------------------------------------
00088 */
00089 
00090 /***************************************************************************/
00091 
00092 #if TIME_WITH_SYS_TIME
00093 # include <sys/time.h>
00094 # include <time.h>
00095 #else
00096 # if HAVE_SYS_TIME_H
00097 #  include <sys/time.h>
00098 # else
00099 #  include <time.h>
00100 # endif
00101 #endif
00102 
00103 #define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \
00104 {                                     \
00105      return (double)(t1 - t0);                        \
00106 }
00107 
00108 /*----------------------------------------------------------------*/
00109 /* PAPI (Added by rich vuduc <richie@cs.berkeley.edu>, October 2004) */
00110 #if defined(HAVE_PAPI) && !defined(HAVE_TICK_COUNTER)
00111 
00113 #include <papi.h>
00114 
00115 typedef long_long ticks;
00116 
00117 #define getticks PAPI_get_real_cyc()
00118 
00119 #define HAVE_TICK_COUNTER
00120 #define TIMER_DESC "PAPI Cycle Counter"
00121 #endif
00122 
00123 /*----------------------------------------------------------------*/
00124 /* Solaris */
00125 #if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER)
00126 typedef hrtime_t ticks;
00127 
00128 #define getticks gethrtime
00129 
00130 INLINE_ELAPSED(inline)
00131 
00132 #define HAVE_TICK_COUNTER
00133 #define TIMER_DESC "Solaris gethrtime()"
00134 #endif
00135 
00136 /*----------------------------------------------------------------*/
00137 /* AIX v. 4+ routines to read the real-time clock or time-base register */
00138 #if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER)
00139 typedef timebasestruct_t ticks;
00140 
00141 static inline ticks getticks(void)
00142 {
00143      ticks t;
00144      read_real_time(&t, TIMEBASE_SZ);
00145      return t;
00146 }
00147 
00148 static inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */
00149 {
00150      time_base_to_time(&t1, TIMEBASE_SZ);
00151      time_base_to_time(&t0, TIMEBASE_SZ);
00152      return ((t1.tb_high - t0.tb_high) * 1e9 + (t1.tb_low - t0.tb_low));
00153 }
00154 
00155 #define HAVE_TICK_COUNTER
00156 #define TIMER_DESC "AIX read_real_time()"
00157 #endif
00158 
00159 /*----------------------------------------------------------------*/
00160 /*
00161  * PowerPC ``cycle'' counter using the time base register.
00162  */
00163 #if ((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))  && !defined(HAVE_TICK_COUNTER)
00164 typedef unsigned long long ticks;
00165 
00166 static __inline__ ticks getticks(void)
00167 {
00168      unsigned int tbl, tbu0, tbu1;
00169 
00170      do {
00171       __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0));
00172       __asm__ __volatile__ ("mftb %0" : "=r"(tbl));
00173       __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1));
00174      } while (tbu0 != tbu1);
00175 
00176      return (((unsigned long long)tbu0) << 32) | tbl;
00177 }
00178 
00179 INLINE_ELAPSED(__inline__)
00180 
00181 #define HAVE_TICK_COUNTER
00182 #define TIMER_DESC "PowerPC time base register"
00183 #endif
00184 /*----------------------------------------------------------------*/
00185 /*
00186  * Pentium cycle counter 
00187  */
00188 #if (defined(__GNUC__) || defined(__ICC) || defined(__PGI)) && (defined(__i386__) || defined(__i386))  && !defined(HAVE_TICK_COUNTER)
00189 typedef unsigned long long ticks;
00190 
00191 static __inline__ ticks getticks(void)
00192 {
00193      ticks ret;
00194 
00195      __asm__ __volatile__("rdtsc": "=A" (ret));
00196      /* no input, nothing else clobbered */
00197      return ret;
00198 }
00199 
00200 INLINE_ELAPSED(__inline__)
00201 
00202 #define HAVE_TICK_COUNTER
00203 #define TIMER_DESC "Intel x86 RDTSC cycle counter (gcc and Intel compilers)"
00204 #endif
00205 
00206 /* Visual C++ -- thanks to Morten Nissov for his help with this */
00207 #if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER)
00208 #include <windows.h>
00209 typedef LARGE_INTEGER ticks;
00210 #define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */
00211 
00212 static __inline ticks getticks(void)
00213 {
00214      ticks ret;
00215 
00216      __asm {
00217       RDTSC
00218       mov ret.HighPart, edx
00219       mov ret.LowPart, eax
00220      }
00221      return ret;
00222 }
00223 
00224 static __inline double elapsed(ticks t1, ticks t0)
00225 {  
00226      return (double)(t1.QuadPart - t0.QuadPart);
00227 }  
00228 
00229 #define HAVE_TICK_COUNTER
00230 #define TIMER_DESC "Intel x86 RDTSC cycle counter (MSC/Win32 inline assembly)"
00231 #endif
00232 
00233 /*----------------------------------------------------------------*/
00234 /*
00235  * X86-64 cycle counter
00236  */
00237 #if defined(__GNUC__) && defined(__x86_64__)  && !defined(HAVE_TICK_COUNTER)
00238 typedef unsigned long long ticks;
00239 
00240 static __inline__ ticks getticks(void)
00241 {
00242      unsigned a, d; 
00243      __asm volatile("rdtsc" : "=a" (a), "=d" (d)); 
00244      return ((ticks)a) | (((ticks)d) << 32); 
00245 }
00246 
00247 INLINE_ELAPSED(__inline__)
00248 
00249 #define HAVE_TICK_COUNTER
00250 #define TIMER_DESC "Intel x86-64 cycle counter"
00251 #endif
00252 
00253 /* Visual C++ (FIXME: how to detect compilation for x86-64?) */
00254 #if _MSC_VER >= 1400 && !defined(HAVE_TICK_COUNTER)
00255 typedef ULONG64 ticks;
00256 
00257 #define getticks __rdtsc
00258 
00259 INLINE_ELAPSED(__inline)
00260 
00261 #define HAVE_TICK_COUNTER
00262 #define TIMER_DESC "Intel x86-64 cycle counter (MSC/Win32 inline assembly)"
00263 #endif
00264 
00265 /*----------------------------------------------------------------*/
00266 /*
00267  * IA64 cycle counter
00268  */
00269 /* intel's ecc compiler */
00270 #if defined(__ECC) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
00271 typedef unsigned long ticks;
00272 #include <ia64intrin.h>
00273 
00274 static __inline__ ticks getticks(void)
00275 {
00276      return __getReg(_IA64_REG_AR_ITC);
00277 }
00278  
00279 INLINE_ELAPSED(__inline__)
00280  
00281 #define HAVE_TICK_COUNTER
00282 #define TIMER_DESC "Intel IA-64 cycle counter (Intel compiler)"
00283 #endif
00284 
00285 #if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
00286 typedef unsigned long ticks;
00287 
00288 static __inline__ ticks getticks(void)
00289 {
00290      ticks ret;
00291 
00292      __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret));
00293      return ret;
00294 }
00295 
00296 INLINE_ELAPSED(__inline__)
00297 
00298 #define HAVE_TICK_COUNTER
00299 #define TIMER_DESC "Intel IA-64 cycle counter (gcc)"
00300 #endif
00301 
00302 /* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */
00303 #if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER)
00304 #include <machine/sys/inline.h>
00305 typedef unsigned long ticks;
00306 
00307 static inline ticks getticks(void)
00308 {
00309      ticks ret;
00310 
00311      ret = _Asm_mov_from_ar (_AREG_ITC);
00312      return ret;
00313 }
00314 
00315 INLINE_ELAPSED(inline)
00316 
00317 #define HAVE_TICK_COUNTER
00318 #define TIMER_DESC "Intel IA-64 cycle counter (HP compiler)"
00319 #endif
00320 
00321 /*----------------------------------------------------------------*/
00322 /*
00323  * PA-RISC cycle counter 
00324  */
00325 #if defined(__hppa__) || defined(__hppa) && !defined(HAVE_TICK_COUNTER)
00326 typedef unsigned long ticks;
00327 
00328 #  ifdef __GNUC__
00329 static __inline__ ticks getticks(void)
00330 {
00331      ticks ret;
00332 
00333      __asm__ __volatile__("mfctl 16, %0": "=r" (ret));
00334      /* no input, nothing else clobbered */
00335      return ret;
00336 }
00337 
00338 #define TIMER_DESC "PA-RISC cycle counter (gcc)"
00339 
00340 #  else
00341 #  include <machine/inline.h>
00342 static inline unsigned long getticks(void)
00343 {
00344      register ticks ret;
00345      _MFCTL(16, ret);
00346      return ret;
00347 }
00348 
00349 #define TIMER_DESC "PA-RISC cycle counter (HP compiler?)"
00350 
00351 #  endif
00352 
00353 INLINE_ELAPSED(inline)
00354 
00355 #define HAVE_TICK_COUNTER
00356 #endif
00357 
00358 /*----------------------------------------------------------------*/
00359 /* S390, courtesy of James Treacy */
00360 #if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER)
00361 typedef unsigned long long ticks;
00362 
00363 static __inline__ ticks getticks(void)
00364 {
00365      ticks cycles;
00366      __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc");
00367      return cycles;
00368 }
00369 
00370 INLINE_ELAPSED(__inline__)
00371 
00372 #define HAVE_TICK_COUNTER
00373 #define TIMER_DESC "IBM S390 cycle counter"
00374 #endif
00375 /*----------------------------------------------------------------*/
00376 #if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER)
00377 /*
00378  * The 32-bit cycle counter on alpha overflows pretty quickly, 
00379  * unfortunately.  A 1GHz machine overflows in 4 seconds.
00380  */
00381 typedef unsigned int ticks;
00382 
00383 static __inline__ ticks getticks(void)
00384 {
00385      unsigned long cc;
00386      __asm__ __volatile__ ("rpcc %0" : "=r"(cc));
00387      return (cc & 0xFFFFFFFF);
00388 }
00389 
00390 INLINE_ELAPSED(__inline__)
00391 
00392 #define HAVE_TICK_COUNTER
00393 #define TIMER_DESC "HP/Compaq/DEC Alpha cycle counter (32-bit)"
00394 #endif
00395 
00396 /*----------------------------------------------------------------*/
00397 #if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER)
00398 typedef unsigned long ticks;
00399 
00400 static __inline__ ticks getticks(void)
00401 {
00402      ticks ret;
00403      __asm__("rd %%tick, %0" : "=r" (ret));
00404      return ret;
00405 }
00406 
00407 INLINE_ELAPSED(__inline__)
00408 
00409 #define HAVE_TICK_COUNTER
00410 #define TIMER_DESC "Sun SPARC v9 cycle counter"
00411 #endif
00412 
00413 /*----------------------------------------------------------------*/
00414 #if defined(__DECC) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER)
00415 #  include <c_asm.h>
00416 typedef unsigned int ticks;
00417 
00418 static __inline ticks getticks(void)
00419 {
00420      unsigned long cc;
00421      cc = asm("rpcc %v0");
00422      return (cc & 0xFFFFFFFF);
00423 }
00424 
00425 INLINE_ELAPSED(__inline)
00426 
00427 #define HAVE_TICK_COUNTER
00428 #define TIMER_DESC "DEC Alpha cycle counter"
00429 #endif
00430 /*----------------------------------------------------------------*/
00431 /* SGI/Irix */
00432 #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER)
00433 typedef struct timespec ticks;
00434 
00435 static inline ticks getticks(void)
00436 {
00437      struct timespec t;
00438      clock_gettime(CLOCK_SGI_CYCLE, &t);
00439      return t;
00440 }
00441 
00442 static inline double elapsed(ticks t1, ticks t0)
00443 {
00444      return (double)(t1.tv_sec - t0.tv_sec) * 1.0E9 +
00445       (double)(t1.tv_nsec - t0.tv_nsec);
00446 }
00447 #define HAVE_TICK_COUNTER
00448 #define TIMER_DESC "SGI/Irix cycle counter"
00449 #endif
00450 
00451 /*----------------------------------------------------------------*/
00452 /* Cray UNICOS _rtc() intrinsic function */
00453 #if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER)
00454 #ifdef HAVE_INTRINSICS_H
00455 #  include <intrinsics.h>
00456 #endif
00457 
00458 typedef long long ticks;
00459 
00460 #define getticks _rtc
00461 
00462 INLINE_ELAPSED(inline)
00463 
00464 #define HAVE_TICK_COUNTER
00465 #define TIMER_DESC "Cray UNICOS _rtc() intrinsic read-counter routine"
00466 #endif
00467 
00468 /* eof */

Generated on Wed Sep 19 16:41:18 2007 for BeBOP Optimized Sparse Kernel Interface Library by  doxygen 1.4.6