diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index cc51597..b85bfc5 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -938,6 +938,7 @@ new_loop_vec_info (struct loop *loop)
   LOOP_VINFO_NITERSM1 (res) = NULL;
   LOOP_VINFO_NITERS (res) = NULL;
   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
+  LOOP_VINFO_NITERS_VECT_LOOP (res) = NULL;
   LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0;
   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
   LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
@@ -6232,9 +6233,13 @@ vect_transform_loop (loop_vec_info loop_vinfo)
     {
       tree ratio_mult_vf;
       if (!ni_name)
-	ni_name = vect_build_loop_niters (loop_vinfo);
+	{
+	  ni_name = vect_build_loop_niters (loop_vinfo);
+	  LOOP_VINFO_NITERS (loop_vinfo) = ni_name;
+	}
       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
 				       &ratio);
+      LOOP_VINFO_NITERS_VECT_LOOP (loop_vinfo) = ratio_mult_vf;
       vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
 				      th, check_profitability);
     }
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 82fca0c..02e1359 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -52,6 +52,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-vectorizer.h"
 #include "cgraph.h"
 #include "builtins.h"
+#include "tree-ssa-address.h"
+#include "tree-ssa-loop-ivopts.h"
 
 /* For lang_hooks.types.type_for_mode.  */
 #include "langhooks.h"
@@ -8627,3 +8629,714 @@ supportable_narrowing_operation (enum tree_code code,
   interm_types->release ();
   return false;
 }
+
+/* Fix trip count of vectorized loop to iterate for loop remainder also.  */
+
+static void
+fix_vec_loop_trip_count (loop_vec_info loop_vinfo)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  tree niters;
+  tree ratio_mult_vf = LOOP_VINFO_NITERS_VECT_LOOP (loop_vinfo);
+  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  gimple *stmt;
+  gimple_stmt_iterator gsi;
+
+  niters = (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)) ?
+	    LOOP_VINFO_NITERS (loop_vinfo)
+	    : LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
+
+  if (TREE_CODE (ratio_mult_vf) == SSA_NAME)
+    {
+      gimple *def = SSA_NAME_DEF_STMT (ratio_mult_vf);
+      tree bnd, lhs, tmp, log_vf;
+      gimple *def_bnd;
+      gimple *new_def_bnd;
+      gcc_assert (gimple_code (def) == GIMPLE_ASSIGN);
+      gcc_assert (gimple_assign_rhs_code (def) == LSHIFT_EXPR);
+      bnd = gimple_assign_rhs1 (def);
+      gcc_assert (TREE_CODE (bnd) == SSA_NAME);
+      gcc_assert (TREE_CODE (gimple_assign_rhs2 (def)) == INTEGER_CST);
+      def_bnd = SSA_NAME_DEF_STMT (bnd);
+      gsi = gsi_for_stmt (def_bnd);
+      /* Create t = niters + vfm1 statement.  */
+      lhs = create_tmp_var (TREE_TYPE (bnd));
+      stmt = gimple_build_assign (lhs, PLUS_EXPR, niters,
+				  build_int_cst (TREE_TYPE (bnd), vf - 1));
+      tmp = make_ssa_name (lhs, stmt);
+      gimple_assign_set_lhs (stmt, tmp);
+      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+      /* Replace BND definition with bnd = t >> log2 (vf).  */
+      log_vf = build_int_cst (TREE_TYPE (tmp), exact_log2 (vf));
+      new_def_bnd = gimple_build_assign (bnd, RSHIFT_EXPR, tmp, log_vf);
+      gsi_replace (&gsi, new_def_bnd, false);
+    }
+  else
+    {
+      tree op_const;
+      unsigned n;
+      unsigned logvf = exact_log2 (vf);
+      gcond *cond;
+      gcc_assert (TREE_CODE (ratio_mult_vf) == INTEGER_CST);
+      gcc_assert (TREE_CODE (niters) == INTEGER_CST);
+      /* Change value of bnd in GIMPLE_COND.  */
+      gcc_assert (loop->num_nodes == 2);
+      stmt = last_stmt (loop->header);
+      gcc_assert (gimple_code (stmt) == GIMPLE_COND);
+      n = tree_to_uhwi (niters);
+      n = ((n + (vf - 1)) >> logvf) << logvf;
+      op_const = build_int_cst (TREE_TYPE (gimple_cond_lhs (stmt)), n);
+      gcc_assert (TREE_CODE (gimple_cond_rhs (stmt)) == INTEGER_CST);
+      cond = dyn_cast <gcond *> (stmt);
+      gimple_cond_set_rhs (cond, op_const);
+    }
+}
+
+/* Did scalar remainder unreachable through vecotirzed loop.  */
+
+static void
+isolate_remainder (loop_vec_info loop_vinfo)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  edge e;
+  basic_block bb = loop->header;
+  gimple *last;
+  gcond *cond;
+
+  e = EDGE_SUCC ((bb), 0);
+  if (flow_bb_inside_loop_p (loop, e->dest))
+    e = EDGE_SUCC ((bb), 1);
+  bb = e->dest;
+  gcc_assert (!flow_bb_inside_loop_p (loop, bb));
+  last = last_stmt (bb);
+  gcc_assert (gimple_code (last) == GIMPLE_COND);
+  cond = as_a <gcond *> (last);
+  /* Assume that target of false edge is scalar loop preheader.  */
+  gimple_cond_make_true (cond);
+}
+
+/* Generate induction_vector which will be used to mask evaluation.  */
+
+static tree
+gen_vec_induction (loop_vec_info loop_vinfo, unsigned elem_size, unsigned size)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  edge pe = loop_preheader_edge (loop);
+  vec<constructor_elt, va_gc> *v;
+  gimple *stmt;
+  gimple_stmt_iterator gsi;
+  gphi *induction_phi;
+  tree iv_type, vectype;
+  tree lhs, rhs, iv;
+  unsigned n;
+  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  int i;
+  tree new_vec, new_var;
+  tree vec_init, vec_step, vec_dest, vec_def;
+  tree val;
+  tree induc_def;
+  basic_block new_bb;
+  machine_mode mode;
+
+  /* Find control iv.  */
+  stmt = last_stmt (loop->header);
+  gcc_assert (gimple_code (stmt) == GIMPLE_COND);
+  lhs = gimple_cond_lhs (stmt);
+  rhs = gimple_cond_rhs (stmt);
+  /* Assume any operand order.  */
+  if (TREE_CODE (lhs) != SSA_NAME)
+    iv = rhs;
+  else
+    {
+      gimple *def_stmt = SSA_NAME_DEF_STMT (lhs);
+      if (gimple_bb (def_stmt) != loop->header)
+	iv = rhs;
+      else
+	iv = lhs;
+    }
+  gcc_assert (TREE_CODE (iv) == SSA_NAME);
+  /* Determine type to build vector index aka induction vector.  */
+  n = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (iv)));
+  if (n > elem_size)
+    /* Multiple types are not yet supported.  */
+    return NULL_TREE;
+  if (n == elem_size && !TYPE_UNSIGNED (TREE_TYPE (iv)))
+    iv_type = TREE_TYPE (iv);
+  else
+    iv_type = build_nonstandard_integer_type (elem_size, 0);
+  vectype = get_vectype_for_scalar_type_and_size (iv_type, size);
+  mode =  TYPE_MODE (vectype);
+  /* Check that vector comparison for IV_TYPE is supported.  */
+  if (get_vcond_icode (mode, mode, 0)== CODE_FOR_nothing)
+    {
+      if (dump_enabled_p ())
+	{
+	  dump_printf_loc (MSG_NOTE, vect_location,
+			   "type is not supported for vector compare!\n");
+	  dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
+	}
+      return NULL_TREE;
+    }
+
+  /* Build induction initialization and insert it to loop preheader.  */
+  vec_alloc (v, vf);
+  for (i = 0; i < vf; i++)
+    {
+      tree elem;
+      elem = build_int_cst (iv_type, i);
+      CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elem);
+    }
+  new_vec = build_vector_from_ctor (vectype, v);
+  new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
+  stmt = gimple_build_assign (new_var, new_vec);
+  vec_init = make_ssa_name (new_var, stmt);
+  gimple_assign_set_lhs (stmt, vec_init);
+  new_bb = gsi_insert_on_edge_immediate (pe, stmt);
+  gcc_assert (!new_bb);
+
+  /* Create vector-step consisting from VF.  */
+  val = build_int_cst (iv_type, vf);
+  new_vec = build_vector_from_val (vectype, val);
+  new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
+  stmt = gimple_build_assign (new_var, new_vec);
+  vec_step = make_ssa_name (new_var, stmt);
+  gimple_assign_set_lhs (stmt, vec_step);
+  new_bb = gsi_insert_on_edge_immediate (pe, stmt);
+  gcc_assert (!new_bb);
+
+  /* Create the induction-phi.  */
+  vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
+  induction_phi = create_phi_node (vec_dest, loop->header);
+  induc_def = PHI_RESULT (induction_phi);
+
+  /* Create vector iv increment inside loop.  */
+  gsi = gsi_after_labels (loop->header);
+  stmt = gimple_build_assign (vec_dest, PLUS_EXPR, induc_def, vec_step);
+  vec_def = make_ssa_name (vec_dest, stmt);
+  gimple_assign_set_lhs (stmt, vec_def);
+  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+
+  /* Set the arguments of phi node.  */
+  add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
+  add_phi_arg (induction_phi, vec_def, loop_latch_edge (loop),
+	       UNKNOWN_LOCATION);
+  return induc_def;
+}
+
+/* Produce mask which will be used for masking.  */
+
+static tree
+gen_mask_for_remainder (loop_vec_info loop_vinfo, tree vec_index, unsigned size)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  tree new_vec, new_var;
+  tree niters, vec_niters, new_niters, vec_res, vec_mask;
+  gimple *stmt;
+  basic_block new_bb;
+  edge pe = loop_preheader_edge (loop);
+  gimple_stmt_iterator gsi;
+  tree vectype = TREE_TYPE (vec_index);
+  tree s_vectype;
+
+  gsi = gsi_after_labels (loop->header);
+  niters = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+	   ? LOOP_VINFO_NITERS (loop_vinfo)
+	   : LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
+
+  /* Create vector for comparison consisting of niters.  */
+  if (!types_compatible_p (TREE_TYPE (niters), TREE_TYPE (vectype)))
+    {
+      tree new_type = TREE_TYPE (vectype);
+      enum tree_code cop;
+      cop = tree_to_uhwi (TYPE_SIZE (new_type)) ==
+	    tree_to_uhwi (TYPE_SIZE (TREE_TYPE (niters)))
+	    ? NOP_EXPR : CONVERT_EXPR;
+      new_niters = make_ssa_name (new_type);
+      stmt = gimple_build_assign (new_niters, cop, niters);
+      new_bb = gsi_insert_on_edge_immediate (pe, stmt);
+      gcc_assert (!new_bb);
+    }
+  else
+    new_niters = niters;
+  new_vec = build_vector_from_val (vectype, new_niters);
+  new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
+  stmt = gimple_build_assign (new_var, new_vec);
+  vec_niters = make_ssa_name (new_var, stmt);
+  gimple_assign_set_lhs (stmt, vec_niters);
+  new_bb = gsi_insert_on_edge_immediate (pe, stmt);
+  gcc_assert (!new_bb);
+  /* Create vector comparison the result of which will be used as mask
+     for loads/stores.  */
+  if (TYPE_UNSIGNED (vectype))
+    {
+      /* Create signed vectype.  */
+      tree stype = TREE_TYPE (vectype);
+      unsigned sz = tree_to_uhwi (TYPE_SIZE (stype));
+      tree new_type = build_nonstandard_integer_type (sz, 0);
+      s_vectype = get_vectype_for_scalar_type_and_size (new_type, size);
+      gcc_assert (s_vectype);
+    }
+  else
+    s_vectype = vectype;
+  vec_mask = vect_get_new_vect_var (s_vectype, vect_simple_var, "vec_mask_");
+  stmt = gimple_build_assign (vec_mask, LT_EXPR, vec_index, vec_niters);
+  vec_res = make_ssa_name (vec_mask, stmt);
+  gimple_assign_set_lhs (stmt, vec_res);
+  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+  return vec_res;
+}
+
+/* Convert each load to masked load.  */
+
+static void
+convert_loads_to_masked (vec<gimple *> *loads, tree mask)
+{
+  gimple *stmt, *new_stmt;
+  tree addr, ref;
+  gimple_stmt_iterator gsi;
+
+  while (loads->length () > 0)
+    {
+      tree lhs, ptr;
+      stmt = loads->pop ();
+      gsi = gsi_for_stmt (stmt);
+      lhs = gimple_assign_lhs (stmt);
+      ref = gimple_assign_rhs1 (stmt);
+      addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
+				       true, NULL_TREE, true,
+				       GSI_SAME_STMT);
+      ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
+      if (!SSA_NAME_PTR_INFO (addr))
+	copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr), ref);
+      new_stmt = gimple_build_call_internal (IFN_MASK_LOAD, 3,
+					     addr, ptr, mask);
+      gimple_call_set_lhs (new_stmt, lhs);
+      gsi_replace (&gsi, new_stmt, false);
+    }
+}
+
+/* Convert each store to masked one.  */
+
+static void
+convert_stores_to_masked (vec<gimple *> *stores, tree mask)
+{
+  gimple *stmt, *new_stmt;
+  tree addr, ref;
+  gimple_stmt_iterator gsi;
+
+  while (stores->length () > 0)
+    {
+      tree rhs, ptr;
+      stmt = stores->pop ();
+      gsi = gsi_for_stmt (stmt);
+      ref = gimple_assign_lhs (stmt);
+      rhs = gimple_assign_rhs1 (stmt);
+      addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
+				       true, NULL_TREE, true,
+				       GSI_SAME_STMT);
+      ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
+      if (!SSA_NAME_PTR_INFO (addr))
+	copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr), ref);
+      new_stmt = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
+					      mask, rhs);
+      gsi_replace (&gsi, new_stmt, false);
+    }
+}
+
+static void
+fix_mask_for_masked_ld_st (vec<gimple *> *masked_stmt, tree mask)
+{
+  gimple *stmt, *new_stmt;
+  tree old, lhs, vectype, var, n_lhs;
+  gimple_stmt_iterator gsi;
+
+  while (masked_stmt->length () > 0)
+    {
+      stmt = masked_stmt->pop ();
+      gsi = gsi_for_stmt (stmt);
+      old = gimple_call_arg (stmt, 2);
+      vectype = TREE_TYPE (old);
+      if (TREE_TYPE (mask) != vectype)
+	{
+	  tree new_vtype = TREE_TYPE (mask);
+	  tree n_var;
+	  tree conv_expr;
+	  n_var = vect_get_new_vect_var (new_vtype, vect_simple_var, NULL);
+	  conv_expr = build1 (VIEW_CONVERT_EXPR, new_vtype, old);
+	  new_stmt = gimple_build_assign (n_var, conv_expr);
+	  n_lhs = make_ssa_name (n_var);
+	  gimple_assign_set_lhs (new_stmt, n_lhs);
+	  vectype = new_vtype;
+	  gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+	}
+      else
+	n_lhs = old;
+      var = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
+      new_stmt = gimple_build_assign (var, BIT_AND_EXPR, mask, n_lhs);
+      lhs = make_ssa_name (var, new_stmt);
+      gimple_assign_set_lhs (new_stmt, lhs);
+      gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+      gimple_call_set_arg (stmt, 2, lhs);
+      update_stmt (stmt);
+    }
+}
+
+/* Convert vectorized reductions to VEC_COND statements to preserve
+   reduction semantic:
+	s1 = x + s2 --> t = x + s2; s1 = (mask)? t : s2.  */
+
+static void
+convert_reductions (loop_vec_info loop_vinfo, tree mask)
+{
+  unsigned i;
+  for (i = 0; i < LOOP_VINFO_REDUCTIONS (loop_vinfo).length (); i++)
+    {
+      gimple *stmt = LOOP_VINFO_REDUCTIONS (loop_vinfo)[i];
+      gimple_stmt_iterator gsi;
+      tree vectype;
+      tree lhs, rhs;
+      tree var, new_lhs, vec_cond_expr;
+      gimple *new_stmt, *def;
+      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+      stmt = STMT_VINFO_VEC_STMT (stmt_info);
+      lhs = gimple_assign_lhs (stmt);
+      vectype = TREE_TYPE (lhs);
+      gsi = gsi_for_stmt (stmt);
+      rhs = gimple_assign_rhs1 (stmt);
+      gcc_assert (TREE_CODE (rhs) == SSA_NAME);
+      def = SSA_NAME_DEF_STMT (rhs);
+      if (gimple_code (def) != GIMPLE_PHI)
+	{
+	  rhs = gimple_assign_rhs2 (stmt);
+	  gcc_assert (TREE_CODE (rhs) == SSA_NAME);
+	  def = SSA_NAME_DEF_STMT (rhs);
+	  gcc_assert (gimple_code (def) == GIMPLE_PHI);
+	}
+      /* Change lhs of STMT.  */
+      var = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
+      new_lhs = make_ssa_name (var, stmt);
+      gimple_assign_set_lhs (stmt, new_lhs);
+      /* Generate new VEC_COND expr.  */
+      vec_cond_expr = build3 (VEC_COND_EXPR, vectype, mask, new_lhs, rhs);
+      new_stmt = gimple_build_assign (lhs, vec_cond_expr);
+      gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
+    }
+}
+
+/* Return true if MEM_REF is incremented by vector size and false otherwise.  */
+
+static bool
+mem_ref_is_vec_size_incremented (loop_vec_info loop_vinfo, tree lhs)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  tree vectype = TREE_TYPE (lhs);
+  unsigned n = GET_MODE_SIZE (TYPE_MODE (vectype));
+  gphi *phi;
+  edge e = loop_latch_edge (loop);
+  tree arg;
+  gimple *def;
+  tree name;
+  if (TREE_CODE (lhs) != MEM_REF)
+    return false;
+  name = TREE_OPERAND (lhs, 0);
+  if (TREE_CODE (name) != SSA_NAME)
+    return false;
+  def = SSA_NAME_DEF_STMT (name);
+  if (!def || gimple_code (def) != GIMPLE_PHI)
+    return false;
+  phi = as_a <gphi *> (def);
+  arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
+  gcc_assert (TREE_CODE (arg) == SSA_NAME);
+  def = SSA_NAME_DEF_STMT (arg);
+  if (gimple_code (def) != GIMPLE_ASSIGN
+      || gimple_assign_rhs_code (def) != POINTER_PLUS_EXPR)
+    return false;
+  arg = gimple_assign_rhs2 (def);
+  if (TREE_CODE (arg) != INTEGER_CST)
+    arg = gimple_assign_rhs1 (def);
+  if (TREE_CODE (arg) != INTEGER_CST)
+    return false;
+  if (compare_tree_int (arg, n) != 0)
+    return false;
+  return true;
+}
+
+/* Combine vectorized loop with scalar remainder through masking statemnts
+   such as memoryt read/write and reduction to produce legal result.
+   New vector inductive variable is created to generate mask which simply is
+   result of compare new variable with vector containing a number of iteration.
+   Loop tripe count is adjusted and scalar loop correspondent to remainder
+   is made unreachable through vectorized loop.  */
+
+void
+combine_vect_loop_remainder (loop_vec_info loop_vinfo)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  auto_vec<gimple *, 10> loads;
+  auto_vec<gimple *, 5> stores;
+  auto_vec<gimple *, 5> masked_ld_st;
+  int elem_size = 0;
+  int n;
+  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  basic_block bb;
+  gimple_stmt_iterator gsi;
+  gimple *stmt;
+  stmt_vec_info stmt_info;
+  tree lhs, rhs, vectype;
+  tree vec_index, vec_mask;
+  bool has_reductions = false;
+  unsigned size = 0;
+
+  if (!loop)
+    return;
+  if (loop->inner)
+    return;  /* do not support outer-loop vectorization.  */
+  gcc_assert (LOOP_VINFO_VECTORIZABLE_P (loop_vinfo));
+  vect_location = find_loop_location (loop);
+  if (!LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
+      || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+    return;
+  if (!LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ()
+      || !LOOP_VINFO_GROUPED_STORES (loop_vinfo).is_empty ())
+    return;
+  bb = loop->header;
+  /* Collect all loads and stores.  */
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      stmt = gsi_stmt (gsi);
+      stmt_info = vinfo_for_stmt (stmt);
+      if (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+	/* Not supported yet!  */
+	return;
+      /* Check that we support given define type.  */
+      if (stmt_info)
+	switch (STMT_VINFO_DEF_TYPE (stmt_info))
+	  {
+	    case vect_induction_def:
+	      if (STMT_VINFO_LIVE_P (stmt_info))
+		return;
+	      break;
+	    case vect_nested_cycle:
+	    case vect_double_reduction_def:
+	    case vect_external_def:
+	      return;
+	    default:
+	      break;
+	  }
+
+      if (gimple_assign_load_p (stmt))
+	{
+	  lhs = gimple_assign_lhs (stmt);
+	  rhs = gimple_assign_rhs1 (stmt);
+	  vectype = TREE_TYPE (lhs);
+	  if (may_be_nonaddressable_p (rhs))
+	    return;
+	  if (!VECTOR_TYPE_P (vectype))
+	    {
+	      struct data_reference *dr;
+	      if (!stmt_info)
+		continue;
+	      dr = STMT_VINFO_DATA_REF (stmt_info);
+	      if (!dr)
+		continue;
+	      if (TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
+		return;
+	      if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) <= 0)
+		{
+		  if (dump_enabled_p ())
+		    dump_printf_loc (MSG_NOTE, vect_location,
+				 "Load with decrement is not masked.\n");
+		  return;
+		}
+	      continue;
+	    }
+	  if (vf / TYPE_VECTOR_SUBPARTS (vectype) > 1)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_NOTE, vect_location,
+				 "multiple-types are not supported yet.\n");
+	      return;
+	    }
+	  n = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
+	  if (elem_size == 0)
+	    elem_size = n;
+	  else if (n != elem_size)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_NOTE, vect_location,
+				 "multiple-types are not supported yet.\n");
+	      return;
+	    }
+	  if (size == 0)
+	    size = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
+	  if (!can_vec_mask_load_store_p (TYPE_MODE (vectype), true))
+	    {
+	      if (dump_enabled_p ())
+		{
+		  dump_printf_loc (MSG_NOTE, vect_location,
+				   "type is not supported for masking!\n");
+		  dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
+		}
+	      return;
+	    }
+	  loads.safe_push (stmt);
+	}
+      else if (gimple_store_p (stmt))
+	{
+	  gcc_assert (gimple_assign_single_p (stmt));
+	  lhs = gimple_assign_lhs (stmt);
+	  if (may_be_nonaddressable_p (lhs))
+	    return;
+	  vectype = TREE_TYPE (lhs);
+	  if (!VECTOR_TYPE_P (vectype))
+	    continue;
+	  if (vf / TYPE_VECTOR_SUBPARTS (vectype) > 1)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_NOTE, vect_location,
+				 "multiple-types are not supported yet.\n");
+	      return;
+	    }
+	  n = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
+	  if (elem_size == 0)
+	      elem_size = n;
+	  else if (n != elem_size)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_NOTE, vect_location,
+				 "multiple-types are not supported yet.\n");
+	      return;
+	    }
+	  if (!mem_ref_is_vec_size_incremented (loop_vinfo, lhs))
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_NOTE, vect_location,
+				 "Store with decrement is not masked.\n");
+	      return;
+	    }
+	  if (size == 0)
+	    size = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
+	  if (!can_vec_mask_load_store_p (TYPE_MODE (vectype), false))
+	    {
+	      if (dump_enabled_p ())
+		{
+		  dump_printf_loc (MSG_NOTE, vect_location,
+				   "type is not supported for masking!\n");
+		  dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
+		}
+	      return;
+	    }
+	  stores.safe_push (stmt);
+	}
+      else if (is_gimple_call (stmt)
+	       && gimple_call_internal_p (stmt)
+	       && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
+		   || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
+	/* Need to figure out what is vectype for new mask.  */
+	masked_ld_st.safe_push (stmt);
+      else if (is_gimple_call (stmt))
+	return;
+    }
+
+  /* Check that all vectorizable reductions can be converted to VCOND.  */
+  if (!LOOP_VINFO_REDUCTIONS (loop_vinfo).is_empty ())
+    {
+      unsigned i;
+      has_reductions = true;
+      for (i = 0; i < LOOP_VINFO_REDUCTIONS (loop_vinfo).length (); i++)
+	{
+	  machine_mode mode;
+
+	  stmt = LOOP_VINFO_REDUCTIONS (loop_vinfo)[i];
+	  stmt_info = vinfo_for_stmt (stmt);
+	  gcc_assert (stmt_info);
+	  if (PURE_SLP_STMT (stmt_info))
+	    return;
+	  gcc_assert (STMT_VINFO_VEC_STMT (stmt_info));
+	  stmt = STMT_VINFO_VEC_STMT (stmt_info);
+	  if (gimple_code (stmt) != GIMPLE_ASSIGN)
+	    return;
+	  /* Only reduction with binary operation is supported.  */
+	  if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
+	      != GIMPLE_BINARY_RHS)
+	    return;
+	  lhs = gimple_assign_lhs (stmt);
+	  vectype = TREE_TYPE (lhs);
+	  if (vf / TYPE_VECTOR_SUBPARTS (vectype) > 1)
+	    /* Not yet supported!  */
+	    return;
+	  n = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
+	  if (elem_size == 0)
+	    elem_size = n;
+	  else if (n != elem_size)
+	    /* Not yet supported!  */
+	    return;
+	  if (size == 0)
+	    size = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
+	  mode = TYPE_MODE (vectype);
+	  if (get_vcond_icode (mode, mode, TYPE_UNSIGNED (vectype))
+	      == CODE_FOR_nothing)
+	    return;
+	}
+    }
+  /* Check masked load/stores is any.  */
+  if (!masked_ld_st.is_empty ())
+    {
+      unsigned i;
+      for (i = 0; i < masked_ld_st.length (); i++)
+	{
+	  tree mask;
+	  tree vectype;
+	  optab tab;
+	  stmt = masked_ld_st[i];
+	  mask = gimple_call_arg (stmt, 2);
+	  vectype = TREE_TYPE (mask);
+	  n = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
+	  if (elem_size == 0)
+	    elem_size = n;
+	  else if (n != elem_size)
+	    /* Mask conversion is not supported yet!  */
+	    return;
+	  if (size == 0)
+	    size = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
+	  /* Check that BIT_AND is supported on target.  */
+	  tab = optab_for_tree_code (BIT_AND_EXPR, vectype, optab_default);
+	  if (!tab)
+	    return;
+	  if (optab_handler (tab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+	    return;
+	}
+    }
+
+  /* Generate induction vector which will be used to evaluate mask.  */
+  vec_index = gen_vec_induction (loop_vinfo, elem_size, size);
+  if (!vec_index)
+    return;
+
+  /* Generate mask vector which will be used to nask saved statements.  */
+  vec_mask = gen_mask_for_remainder (loop_vinfo, vec_index, size);
+  gcc_assert (vec_mask);
+
+  /* Convert vectororized loads to masked ones.  */
+  if (!loads.is_empty ())
+    convert_loads_to_masked (&loads, vec_mask);
+
+  /* Convert vectoirizzed stores to masked ones.  */
+  if (!stores.is_empty ())
+    convert_stores_to_masked (&stores, vec_mask);
+
+  if (has_reductions)
+    convert_reductions (loop_vinfo, vec_mask);
+
+  if (!masked_ld_st.is_empty ())
+    fix_mask_for_masked_ld_st (&masked_ld_st, vec_mask);
+
+  /* Fix loop trip count.  */
+  fix_vec_loop_trip_count (loop_vinfo);
+
+  /* Fix up cfg to make scalar loop remainder unreachable.  */
+  isolate_remainder (loop_vinfo);
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "=== scalar remainder has been deleted ===\n");
+}
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 3e6fd35..f7366c1 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -559,6 +559,18 @@ vectorize_loops (void)
 	  }
       }
 
+  /* Try to combine vectorized loop and scalar remainder.  */
+  for (i = 1; i < vect_loops_num; i++)
+    {
+      loop_vec_info loop_vinfo;
+      loop = get_loop (cfun, i);
+      if (!loop || loop->inner)
+	continue;
+      loop_vinfo = (loop_vec_info) loop->aux;
+      if (loop_vinfo && LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
+	combine_vect_loop_remainder (loop_vinfo);
+    }
+
   for (i = 1; i < vect_loops_num; i++)
     {
       loop_vec_info loop_vinfo;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index bf01ded..e8865bc 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -230,6 +230,8 @@ typedef struct _loop_vec_info : public vec_info {
   tree num_iters;
   /* Number of iterations of the original loop.  */
   tree num_iters_unchanged;
+  /* Number of iteration of vectorized loop.  */
+  tree num_iters_vect_loop;
 
   /* Threshold of number of iterations below which vectorzation will not be
      performed. It is calculated from MIN_PROFITABLE_ITERS and
@@ -335,6 +337,7 @@ typedef struct _loop_vec_info : public vec_info {
 #define LOOP_VINFO_BBS(L)                  (L)->bbs
 #define LOOP_VINFO_NITERSM1(L)             (L)->num_itersm1
 #define LOOP_VINFO_NITERS(L)               (L)->num_iters
+#define LOOP_VINFO_NITERS_VECT_LOOP(L)    (L)->num_iters_vect_loop
 /* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after
    prologue peeling retain total unchanged scalar loop iterations for
    cost model.  */
@@ -994,6 +997,7 @@ extern void vect_get_vec_defs (tree, tree, gimple *, vec<tree> *,
 			       vec<tree> *, slp_tree, int);
 extern tree vect_gen_perm_mask_any (tree, const unsigned char *);
 extern tree vect_gen_perm_mask_checked (tree, const unsigned char *);
+extern void combine_vect_loop_remainder (loop_vec_info);
 
 /* In tree-vect-data-refs.c.  */
 extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
