From: "H.J. Lu" <hjl.to...@gmail.com>

>The above reads like it would be worth splitting branc_prediction_hits
>into branch_prediction_hints_taken and branch_prediction_hints_not_taken
>given not-taken is the default and thus will just increase code size?
>According to Intel® 64 and IA-32 Architectures Optimization Reference
>Manual[1], Branch Hint is updated for Redwood Cove.
Changed.

--------cut from [1]-------------------------
Starting with the Redwood Cove microarchitecture, if the predictor has
no stored information about a branch, the branch has the Intel® SSE2
branch taken hint (i.e., instruction prefix 3EH), When the codec
decodes the branch, it flips the branch’s prediction from not-taken to
taken. It then flushes the pipeline in front of it and steers this
pipeline to fetch the taken path of the branch.
--------cut end -----------------------------

Split tune branch_prediction_hints into branch_prediction_hints_taken
and branch_prediction_hints_not_taken, always generate branch hint for
conditional branches, both tunes are disabled by default.

[1] 
https://www.intel.com/content/www/us/en/content-details/821612/intel-64-and-ia-32-architectures-optimization-reference-manual-volume-1.html

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/

        * config/i386/i386.cc (ix86_print_operand): Always generate
        branch hint for conditional branches.
        * config/i386/i386.h (TARGET_BRANCH_PREDICTION_HINTS): Split
        into ..
        (TARGET_BRANCH_PREDICTION_HINTS_TAKEN): .. this, and ..
        (TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN): .. this.
        * config/i386/x86-tune.def (X86_TUNE_BRANCH_PREDICTION_HINTS):
        Split into ..
        (X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN): .. this, and ..
        (X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN): .. this.
---
 gcc/config/i386/i386.cc      | 29 +++++++++--------------------
 gcc/config/i386/i386.h       |  6 ++++--
 gcc/config/i386/x86-tune.def | 13 +++++++++++--
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 1f71ed04be6..ea9cb620f8d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -14041,7 +14041,8 @@ ix86_print_operand (FILE *file, rtx x, int code)
 
            if (!optimize
                || optimize_function_for_size_p (cfun)
-               || !TARGET_BRANCH_PREDICTION_HINTS)
+               || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
+                   && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
              return;
 
            x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
@@ -14050,25 +14051,13 @@ ix86_print_operand (FILE *file, rtx x, int code)
                int pred_val = profile_probability::from_reg_br_prob_note
                                 (XINT (x, 0)).to_reg_br_prob_base ();
 
-               if (pred_val < REG_BR_PROB_BASE * 45 / 100
-                   || pred_val > REG_BR_PROB_BASE * 55 / 100)
-                 {
-                   bool taken = pred_val > REG_BR_PROB_BASE / 2;
-                   bool cputaken
-                     = final_forward_branch_p (current_output_insn) == 0;
-
-                   /* Emit hints only in the case default branch prediction
-                      heuristics would fail.  */
-                   if (taken != cputaken)
-                     {
-                       /* We use 3e (DS) prefix for taken branches and
-                          2e (CS) prefix for not taken branches.  */
-                       if (taken)
-                         fputs ("ds ; ", file);
-                       else
-                         fputs ("cs ; ", file);
-                     }
-                 }
+               bool taken = pred_val > REG_BR_PROB_BASE / 2;
+               /* We use 3e (DS) prefix for taken branches and
+                  2e (CS) prefix for not taken branches.  */
+               if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
+                 fputs ("ds ; ", file);
+               else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
+                 fputs ("cs ; ", file);
              }
            return;
          }
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 9ed225ec587..50ebed221dc 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -309,8 +309,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_ZERO_EXTEND_WITH_AND \
        ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
 #define TARGET_UNROLL_STRLEN   ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
-#define TARGET_BRANCH_PREDICTION_HINTS \
-       ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
+#define TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN \
+       ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN]
+#define TARGET_BRANCH_PREDICTION_HINTS_TAKEN \
+       ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN]
 #define TARGET_DOUBLE_WITH_ADD ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
 #define TARGET_USE_SAHF                ix86_tune_features[X86_TUNE_USE_SAHF]
 #define TARGET_MOVX            ix86_tune_features[X86_TUNE_MOVX]
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 343c32c291f..3d29bffc49c 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -683,15 +683,24 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
 DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
           m_K8)
 
+/* X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, starting with the Redwood Cove
+   microarchitecture, if the predictor has no stored information about a 
branch,
+   the branch has the Intel® SSE2 branch taken hint
+   (i.e., instruction prefix 3EH), When the codec decodes the branch, it flips
+   the branch’s prediction from not-taken to taken. It then flushes the 
pipeline
+   in front of it and steers this pipeline to fetch the taken path of the
+   branch.  */
+DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, 
"branch_prediction_hints_taken", m_NONE)
+
 /*****************************************************************************/
 /* This never worked well before.                                            */
 /*****************************************************************************/
 
-/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
+/* X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN: Branch hints were put in P4 
based
    on simulation result. But after P4 was made, no performance benefit
    was observed with branch hints.  It also increases the code size.
    As a result, icc never generates branch hints.  */
-DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS, "branch_prediction_hints", m_NONE)
+DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN, 
"branch_prediction_hints_not_taken", m_NONE)
 
 /* X86_TUNE_QIMODE_MATH: Enable use of 8bit arithmetic.  */
 DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", m_ALL)
-- 
2.31.1

Reply via email to