From: David Woodhouse <[email protected]>

Add a read_raw() callback to struct clocksource which returns the
derived clocksource value while also providing the underlying hardware
counter reading. This allows ktime_get_snapshot_id() to populate a new
raw_cycles field in struct system_time_snapshot.

For clocksources that are derived from an underlying counter (e.g.,
Hyper-V TSC page scales TSC to 10MHz, kvmclock scales TSC to 1GHz), this
provides atomic access to both the derived value needed for timekeeping
calculations, and the raw hardware counter needed by consumers like
KVM's master clock and the vmclock PTP driver.

Signed-off-by: David Woodhouse <[email protected]>
Assisted-by: Kiro:claude-opus-4.6-1m
---
 include/linux/clocksource.h |  8 ++++++++
 include/linux/timekeeping.h |  6 ++++++
 kernel/time/timekeeping.c   | 30 +++++++++++++++++++++++++++++-
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7c38190b10bf..674299e32f0c 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -37,6 +37,10 @@ struct module;
  *     This is the structure used for system time.
  *
  * @read:              Returns a cycle value, passes clocksource as argument
+ * @read_raw:          Where a clocksource such as kvmclock or the Hyper-V
+ *                     scaled TSC is calculated from an underlying hardware
+ *                     counter, return both a cycle value and the raw value
+ *                     of the underlying counter from which it was calculated
  * @mask:              Bitmask for two's complement
  *                     subtraction of non 64 bit counters
  * @mult:              Cycle to nanosecond multiplier
@@ -69,6 +73,8 @@ struct module;
  *                     in certain snapshot functions to allow callers to
  *                     validate the clocksource from which the snapshot was
  *                     taken.
+ * @raw_csid:          If a @read_raw method exists, the clocksource_id of the
+ *                     raw underlying counter
  * @flags:             Flags describing special properties
  * @base:              Hardware abstraction for clock on which a clocksource
  *                     is based
@@ -97,6 +103,7 @@ struct module;
  */
 struct clocksource {
        u64                     (*read)(struct clocksource *cs);
+       u64                     (*read_raw)(struct clocksource *cs, u64 *raw);
        u64                     mask;
        u32                     mult;
        u32                     shift;
@@ -109,6 +116,7 @@ struct clocksource {
        u32                     freq_khz;
        int                     rating;
        enum clocksource_ids    id;
+       enum clocksource_ids    raw_csid;
        enum vdso_clock_mode    vdso_clock_mode;
        unsigned long           flags;
        struct clocksource_base *base;
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index f7945f1048fc..54799a9ebeb0 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -279,18 +279,24 @@ static inline bool ktime_get_aux_ts64(clockid_t id, 
struct timespec64 *kt) { ret
  * struct system_time_snapshot - Simultaneous time capture of 
CLOCK_MONOTONIC_RAW,
  *                              a selected CLOCK_* and the clocksource counter 
value
  * @cycles:            Clocksource counter value to produce the system times
+ * @raw_cycles:                For derived clocksources, the raw hardware 
counter value from
+ *                     which @cycles was derived
  * @sys:               The system time of the selected CLOCK ID
  * @raw:               Monotonic raw system time
  * @cs_id:             Clocksource ID
+ * @raw_csid:          Clocksource ID of underlying raw hardware counter, set 
if
+ *                     @raw_cycles is non-zero
  * @clock_was_set_seq: The sequence number of clock-was-set events
  * @cs_was_changed_seq:        The sequence number of clocksource change events
  * @valid:             True if the snapshot is valid
  */
 struct system_time_snapshot {
        u64                     cycles;
+       u64                     raw_cycles;
        ktime_t                 sys;
        ktime_t                 raw;
        enum clocksource_ids    cs_id;
+       enum clocksource_ids    raw_csid;
        unsigned int            clock_was_set_seq;
        u8                      cs_was_changed_seq;
        u8                      valid;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index c4fd7229b7da..6c75a677fd2a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -304,6 +304,21 @@ static __always_inline u64 tk_clock_read(const struct 
tk_read_base *tkr)
        return clock->read(clock);
 }
 
+static __always_inline u64 tk_clock_read_raw(const struct tk_read_base *tkr, 
u64 *raw)
+{
+       struct clocksource *clock = READ_ONCE(tkr->clock);
+
+       *raw = 0;
+
+       if (static_branch_likely(&clocksource_read_inlined))
+               return arch_inlined_clocksource_read(clock);
+
+       if (clock->read_raw)
+               return clock->read_raw(clock, raw);
+       else
+               return clock->read(clock);
+}
+
 static inline void clocksource_disable_inline_read(void)
 {
        static_branch_disable(&clocksource_read_inlined);
@@ -320,6 +335,18 @@ static __always_inline u64 tk_clock_read(const struct 
tk_read_base *tkr)
 
        return clock->read(clock);
 }
+
+static __always_inline u64 tk_clock_read_raw(const struct tk_read_base *tkr, 
u64 *raw)
+{
+       struct clocksource *clock = READ_ONCE(tkr->clock);
+
+       *raw = 0;
+
+       if (clock->read_raw)
+               return clock->read_raw(clock, raw);
+       else
+               return clock->read(clock);
+}
 static inline void clocksource_disable_inline_read(void) { }
 static inline void clocksource_enable_inline_read(void) { }
 #endif
@@ -1243,8 +1270,9 @@ bool ktime_get_snapshot_id(struct system_time_snapshot 
*systime_snapshot, clocki
                if (!tk->clock_valid)
                        return false;
 
-               now = tk_clock_read(&tk->tkr_mono);
+               now = tk_clock_read_raw(&tk->tkr_mono, 
&systime_snapshot->raw_cycles);
                systime_snapshot->cs_id = tk->tkr_mono.clock->id;
+               systime_snapshot->raw_csid = tk->tkr_mono.clock->raw_csid;
                systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
                systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
 
-- 
2.54.0


Reply via email to