tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
if (consumer->opcode() == HloOpcode::kDot) {
// In the general case we call out to optimized "black box" GEMM routines
// for Dot, which precludes fusion. However, in very specific cases, we try
// to fuse Dot operations by generating an elemental dot implementation.
//
// We need to be careful and conservative here since any benefit we get from
// fusion can easily be overshadowed by the overhead of a naive GEMM
// algorithm in the IR.