Skip to content

Commit fc703f0

Browse files
Deploying to gh-pages from @ d2ca75e 🚀
1 parent c07e41a commit fc703f0

5 files changed

+85
-42
lines changed

_modules/fbgemm_gpu/split_table_batched_embeddings_ops_inference.html

+27-7
Original file line numberDiff line numberDiff line change
@@ -458,13 +458,15 @@ <h1>Source code for fbgemm_gpu.split_table_batched_embeddings_ops_inference</h1>
458458
<span class="c1"># pyre-ignore-all-errors[56]</span>
459459

460460
<span class="kn">import</span><span class="w"> </span><span class="nn">logging</span>
461+
<span class="kn">import</span><span class="w"> </span><span class="nn">uuid</span>
461462
<span class="kn">from</span><span class="w"> </span><span class="nn">itertools</span><span class="w"> </span><span class="kn">import</span> <span class="n">accumulate</span>
462463
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">List</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">,</span> <span class="n">Union</span>
463464

464465
<span class="kn">import</span><span class="w"> </span><span class="nn">fbgemm_gpu</span> <span class="c1"># noqa: F401</span>
465466
<span class="kn">import</span><span class="w"> </span><span class="nn">torch</span> <span class="c1"># usort:skip</span>
466467
<span class="kn">from</span><span class="w"> </span><span class="nn">torch</span><span class="w"> </span><span class="kn">import</span> <span class="n">nn</span><span class="p">,</span> <span class="n">Tensor</span> <span class="c1"># usort:skip</span>
467468

469+
<span class="kn">from</span><span class="w"> </span><span class="nn">fbgemm_gpu.config</span><span class="w"> </span><span class="kn">import</span> <span class="n">FeatureGateName</span>
468470
<span class="kn">from</span><span class="w"> </span><span class="nn">fbgemm_gpu.split_embedding_configs</span><span class="w"> </span><span class="kn">import</span> <span class="n">sparse_type_to_int</span><span class="p">,</span> <span class="n">SparseType</span>
469471
<span class="kn">from</span><span class="w"> </span><span class="nn">fbgemm_gpu.split_table_batched_embeddings_ops_common</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span>
470472
<span class="n">BoundsCheckMode</span><span class="p">,</span>
@@ -817,6 +819,10 @@ <h1>Source code for fbgemm_gpu.split_table_batched_embeddings_ops_inference</h1>
817819
<span class="n">indices_dtype</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">dtype</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">int32</span><span class="p">,</span> <span class="c1"># Used for construction of the remap_indices tensors. Should match the dtype of the indices passed in the forward() call (INT32 or INT64).</span>
818820
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span> <span class="c1"># noqa C901 # tuple of (rows, dims,)</span>
819821
<span class="nb">super</span><span class="p">(</span><span class="n">IntNBitTableBatchedEmbeddingBagsCodegen</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
822+
<span class="bp">self</span><span class="o">.</span><span class="n">uuid</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">())</span>
823+
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="p">(</span>
824+
<span class="sa">f</span><span class="s2">&quot;Feature Gates: </span><span class="si">{</span><span class="p">[(</span><span class="n">feature</span><span class="o">.</span><span class="n">name</span><span class="p">,</span><span class="w"> </span><span class="n">feature</span><span class="o">.</span><span class="n">is_enabled</span><span class="p">())</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">feature</span><span class="w"> </span><span class="ow">in</span><span class="w"> </span><span class="n">FeatureGateName</span><span class="p">]</span><span class="si">}</span><span class="s2">&quot;</span>
825+
<span class="p">)</span>
820826

821827
<span class="c1"># 64 for AMD</span>
822828
<span class="k">if</span> <span class="n">cache_assoc</span> <span class="o">==</span> <span class="mi">32</span> <span class="ow">and</span> <span class="n">torch</span><span class="o">.</span><span class="n">version</span><span class="o">.</span><span class="n">hip</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
@@ -1072,6 +1078,20 @@ <h1>Source code for fbgemm_gpu.split_table_batched_embeddings_ops_inference</h1>
10721078
<span class="bp">self</span><span class="o">.</span><span class="n">fp8_exponent_bits</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span>
10731079
<span class="bp">self</span><span class="o">.</span><span class="n">fp8_exponent_bias</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span>
10741080

1081+
<span class="nd">@torch</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">ignore</span>
1082+
<span class="k">def</span><span class="w"> </span><span class="nf">log</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">msg</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
1083+
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
1084+
<span class="sd"> Log with TBE id prefix to distinguish between multiple TBE instances</span>
1085+
<span class="sd"> per process</span>
1086+
1087+
<span class="sd"> Args:</span>
1088+
<span class="sd"> msg (str): The message to print</span>
1089+
1090+
<span class="sd"> Returns:</span>
1091+
<span class="sd"> None</span>
1092+
<span class="sd"> &quot;&quot;&quot;</span>
1093+
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;[TBE=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">uuid</span><span class="si">}</span><span class="s2">] </span><span class="si">{</span><span class="n">msg</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
1094+
10751095
<span class="k">def</span><span class="w"> </span><span class="nf">get_cache_miss_counter</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
10761096
<span class="c1"># cache_miss_counter[0]: cache_miss_forward_count which records the total number of forwards which has at least one cache miss</span>
10771097
<span class="c1"># cache_miss_counter[1]: unique_cache_miss_count which records to total number of unique (dedup) cache misses</span>
@@ -1120,17 +1140,17 @@ <h1>Source code for fbgemm_gpu.split_table_batched_embeddings_ops_inference</h1>
11201140
<span class="k">assert</span> <span class="p">(</span>
11211141
<span class="bp">self</span><span class="o">.</span><span class="n">record_cache_metrics</span><span class="o">.</span><span class="n">record_cache_miss_counter</span>
11221142
<span class="p">),</span> <span class="s2">&quot;record_cache_miss_counter should be true to access counter values&quot;</span>
1123-
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
1143+
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="p">(</span>
11241144
<span class="sa">f</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">&quot;</span>
11251145
<span class="sa">f</span><span class="s2">&quot;Miss counter value [0] - # of miss occured iters : </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">cache_miss_counter</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="si">}</span><span class="s2">, </span><span class="se">\n</span><span class="s2">&quot;</span>
11261146
<span class="sa">f</span><span class="s2">&quot;Miss counter value [1] - # of unique misses : </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">cache_miss_counter</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="si">}</span><span class="s2">, </span><span class="se">\n</span><span class="s2">&quot;</span>
11271147
<span class="sa">f</span><span class="s2">&quot;Miss counter value [2] - # of unique requested indices : </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">cache_miss_counter</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="si">}</span><span class="s2">, </span><span class="se">\n</span><span class="s2">&quot;</span>
11281148
<span class="sa">f</span><span class="s2">&quot;Miss counter value [3] - # of total requested indices : </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">cache_miss_counter</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span><span class="si">}</span><span class="s2">, &quot;</span>
11291149
<span class="p">)</span>
1130-
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
1150+
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="p">(</span>
11311151
<span class="sa">f</span><span class="s2">&quot;unique_miss_rate using counter : </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">cache_miss_counter</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">cache_miss_counter</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="si">}</span><span class="s2">, </span><span class="se">\n</span><span class="s2">&quot;</span>
11321152
<span class="p">)</span>
1133-
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
1153+
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="p">(</span>
11341154
<span class="sa">f</span><span class="s2">&quot;total_miss_rate using counter : </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">cache_miss_counter</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">cache_miss_counter</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span><span class="si">}</span><span class="s2">, </span><span class="se">\n</span><span class="s2">&quot;</span>
11351155
<span class="p">)</span>
11361156

@@ -1145,7 +1165,7 @@ <h1>Source code for fbgemm_gpu.split_table_batched_embeddings_ops_inference</h1>
11451165
<span class="bp">self</span><span class="o">.</span><span class="n">gather_uvm_cache_stats</span>
11461166
<span class="p">),</span> <span class="s2">&quot;gather_uvm_cache_stats should be set to true to access uvm cache stats.&quot;</span>
11471167
<span class="n">uvm_cache_stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">uvm_cache_stats</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span>
1148-
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
1168+
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="p">(</span>
11491169
<span class="sa">f</span><span class="s2">&quot;N_called: </span><span class="si">{</span><span class="n">uvm_cache_stats</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="si">}</span><span class="se">\n</span><span class="s2">&quot;</span>
11501170
<span class="sa">f</span><span class="s2">&quot;N_requested_indices: </span><span class="si">{</span><span class="n">uvm_cache_stats</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="si">}</span><span class="se">\n</span><span class="s2">&quot;</span>
11511171
<span class="sa">f</span><span class="s2">&quot;N_unique_indices: </span><span class="si">{</span><span class="n">uvm_cache_stats</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="si">}</span><span class="se">\n</span><span class="s2">&quot;</span>
@@ -1154,7 +1174,7 @@ <h1>Source code for fbgemm_gpu.split_table_batched_embeddings_ops_inference</h1>
11541174
<span class="sa">f</span><span class="s2">&quot;N_conflict_misses: </span><span class="si">{</span><span class="n">uvm_cache_stats</span><span class="p">[</span><span class="mi">5</span><span class="p">]</span><span class="si">}</span><span class="se">\n</span><span class="s2">&quot;</span>
11551175
<span class="p">)</span>
11561176
<span class="k">if</span> <span class="n">uvm_cache_stats</span><span class="p">[</span><span class="mi">1</span><span class="p">]:</span>
1157-
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
1177+
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="p">(</span>
11581178
<span class="sa">f</span><span class="s2">&quot;unique indices / requested indices: </span><span class="si">{</span><span class="n">uvm_cache_stats</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="n">uvm_cache_stats</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="si">}</span><span class="se">\n</span><span class="s2">&quot;</span>
11591179
<span class="sa">f</span><span class="s2">&quot;unique misses / requested indices: </span><span class="si">{</span><span class="n">uvm_cache_stats</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="n">uvm_cache_stats</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="si">}</span><span class="se">\n</span><span class="s2">&quot;</span>
11601180
<span class="p">)</span>
@@ -1660,7 +1680,7 @@ <h1>Source code for fbgemm_gpu.split_table_batched_embeddings_ops_inference</h1>
16601680
<span class="k">assert</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">use_cpu</span>
16611681
<span class="k">if</span> <span class="n">enforce_hbm</span><span class="p">:</span>
16621682
<span class="k">if</span> <span class="ow">not</span> <span class="n">torch</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">is_scripting</span><span class="p">():</span>
1663-
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Enforce hbm for the cache location&quot;</span><span class="p">)</span>
1683+
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="s2">&quot;Enforce hbm for the cache location&quot;</span><span class="p">)</span>
16641684
<span class="bp">self</span><span class="o">.</span><span class="n">weights_uvm</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span>
16651685
<span class="n">uvm_size</span><span class="p">,</span>
16661686
<span class="n">device</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">current_device</span><span class="p">,</span>
@@ -1800,7 +1820,7 @@ <h1>Source code for fbgemm_gpu.split_table_batched_embeddings_ops_inference</h1>
18001820
<span class="k">if</span> <span class="n">cache_algorithm</span> <span class="o">==</span> <span class="n">CacheAlgorithm</span><span class="o">.</span><span class="n">LFU</span><span class="p">:</span>
18011821
<span class="k">assert</span> <span class="n">cache_sets</span> <span class="o">&lt;</span> <span class="mi">2</span><span class="o">**</span><span class="mi">24</span> <span class="o">-</span> <span class="mi">1</span>
18021822
<span class="n">cache_size</span> <span class="o">=</span> <span class="n">cache_sets</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">cache_assoc</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_D_cache</span>
1803-
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
1823+
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="p">(</span>
18041824
<span class="sa">f</span><span class="s2">&quot;Using on-device cache with admission algorithm &quot;</span>
18051825
<span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">cache_algorithm</span><span class="si">}</span><span class="s2">, </span><span class="si">{</span><span class="n">cache_sets</span><span class="si">}</span><span class="s2"> sets, &quot;</span>
18061826
<span class="sa">f</span><span class="s2">&quot;cache_load_factor: </span><span class="si">{</span><span class="n">cache_load_factor</span><span class="w"> </span><span class="si">:</span><span class="s2"> .3f</span><span class="si">}</span><span class="s2">, &quot;</span>

0 commit comments

Comments
 (0)