|
14 | 14 | import torch
|
15 | 15 |
|
16 | 16 | from torch import nn
|
17 |
| -from torchrec.ir.schema import EBCMetadata, EmbeddingBagConfigMetadata |
| 17 | +from torchrec.ir.schema import ( |
| 18 | + EBCMetadata, |
| 19 | + EmbeddingBagConfigMetadata, |
| 20 | + FPEBCMetadata, |
| 21 | + PositionWeightedModuleCollectionMetadata, |
| 22 | + PositionWeightedModuleMetadata, |
| 23 | +) |
18 | 24 |
|
19 | 25 | from torchrec.ir.types import SerializerInterface
|
20 | 26 | from torchrec.modules.embedding_configs import DataType, EmbeddingBagConfig, PoolingType
|
21 | 27 | from torchrec.modules.embedding_modules import EmbeddingBagCollection
|
| 28 | +from torchrec.modules.feature_processor_ import ( |
| 29 | + FeatureProcessor, |
| 30 | + FeatureProcessorsCollection, |
| 31 | + PositionWeightedModule, |
| 32 | + PositionWeightedModuleCollection, |
| 33 | +) |
| 34 | +from torchrec.modules.fp_embedding_modules import FeatureProcessedEmbeddingBagCollection |
22 | 35 |
|
23 | 36 | logger: logging.Logger = logging.getLogger(__name__)
|
24 | 37 |
|
@@ -71,7 +84,7 @@ def get_deserialized_device(
|
71 | 84 |
|
72 | 85 | class EBCJsonSerializer(SerializerInterface):
|
73 | 86 | """
|
74 |
| - Serializer for torch.export IR using thrift. |
| 87 | + Serializer for torch.export IR using json. |
75 | 88 | """
|
76 | 89 |
|
77 | 90 | @classmethod
|
@@ -132,13 +145,155 @@ def deserialize(
|
132 | 145 | )
|
133 | 146 |
|
134 | 147 |
|
| 148 | +class PWMJsonSerializer(SerializerInterface): |
| 149 | + """ |
| 150 | + Serializer for torch.export IR using json. |
| 151 | + """ |
| 152 | + |
| 153 | + @classmethod |
| 154 | + def serialize(cls, module: nn.Module) -> torch.Tensor: |
| 155 | + if not isinstance(module, PositionWeightedModule): |
| 156 | + raise ValueError( |
| 157 | + f"Expected module to be of type PositionWeightedModule, got {type(module)}" |
| 158 | + ) |
| 159 | + metadata = PositionWeightedModuleMetadata( |
| 160 | + max_feature_length=module.position_weight.shape[0], |
| 161 | + ) |
| 162 | + return torch.frombuffer( |
| 163 | + json.dumps(metadata.__dict__).encode(), dtype=torch.uint8 |
| 164 | + ) |
| 165 | + |
| 166 | + @classmethod |
| 167 | + def deserialize( |
| 168 | + cls, |
| 169 | + input: torch.Tensor, |
| 170 | + typename: str, |
| 171 | + device: Optional[torch.device] = None, |
| 172 | + children: Dict[str, nn.Module] = {}, |
| 173 | + ) -> nn.Module: |
| 174 | + if typename != "PositionWeightedModule": |
| 175 | + raise ValueError( |
| 176 | + f"Expected typename to be PositionWeightedModule, got {typename}" |
| 177 | + ) |
| 178 | + raw_bytes = input.numpy().tobytes() |
| 179 | + metadata = json.loads(raw_bytes) |
| 180 | + return PositionWeightedModule(metadata["max_feature_length"], device) |
| 181 | + |
| 182 | + |
| 183 | +class PWMCJsonSerializer(SerializerInterface): |
| 184 | + """ |
| 185 | + Serializer for torch.export IR using json. |
| 186 | + """ |
| 187 | + |
| 188 | + @classmethod |
| 189 | + def serialize(cls, module: nn.Module) -> torch.Tensor: |
| 190 | + if not isinstance(module, PositionWeightedModuleCollection): |
| 191 | + raise ValueError( |
| 192 | + f"Expected module to be of type PositionWeightedModuleCollection, got {type(module)}" |
| 193 | + ) |
| 194 | + metadata = PositionWeightedModuleCollectionMetadata( |
| 195 | + max_feature_lengths=[ # convert to list of tuples to preserve the order |
| 196 | + (feature, len) for feature, len in module.max_feature_lengths.items() |
| 197 | + ], |
| 198 | + ) |
| 199 | + return torch.frombuffer( |
| 200 | + json.dumps(metadata.__dict__).encode(), dtype=torch.uint8 |
| 201 | + ) |
| 202 | + |
| 203 | + @classmethod |
| 204 | + def deserialize( |
| 205 | + cls, |
| 206 | + input: torch.Tensor, |
| 207 | + typename: str, |
| 208 | + device: Optional[torch.device] = None, |
| 209 | + children: Dict[str, nn.Module] = {}, |
| 210 | + ) -> nn.Module: |
| 211 | + if typename != "PositionWeightedModuleCollection": |
| 212 | + raise ValueError( |
| 213 | + f"Expected typename to be PositionWeightedModuleCollection, got {typename}" |
| 214 | + ) |
| 215 | + raw_bytes = input.numpy().tobytes() |
| 216 | + metadata = PositionWeightedModuleCollectionMetadata(**json.loads(raw_bytes)) |
| 217 | + max_feature_lengths = { |
| 218 | + feature: len for feature, len in metadata.max_feature_lengths |
| 219 | + } |
| 220 | + return PositionWeightedModuleCollection(max_feature_lengths, device) |
| 221 | + |
| 222 | + |
| 223 | +class FPEBCJsonSerializer(SerializerInterface): |
| 224 | + """ |
| 225 | + Serializer for torch.export IR using json. |
| 226 | + """ |
| 227 | + |
| 228 | + @classmethod |
| 229 | + def requires_children(cls, typename: str) -> bool: |
| 230 | + return True |
| 231 | + |
| 232 | + @classmethod |
| 233 | + def serialize( |
| 234 | + cls, |
| 235 | + module: nn.Module, |
| 236 | + ) -> torch.Tensor: |
| 237 | + if not isinstance(module, FeatureProcessedEmbeddingBagCollection): |
| 238 | + raise ValueError( |
| 239 | + f"Expected module to be of type FeatureProcessedEmbeddingBagCollection, got {type(module)}" |
| 240 | + ) |
| 241 | + elif isinstance(module._feature_processors, FeatureProcessorsCollection): |
| 242 | + metadata = FPEBCMetadata( |
| 243 | + is_fp_collection=True, |
| 244 | + feature_list=[], |
| 245 | + ) |
| 246 | + else: |
| 247 | + metadata = FPEBCMetadata( |
| 248 | + is_fp_collection=False, |
| 249 | + feature_list=list(module._feature_processors.keys()), |
| 250 | + ) |
| 251 | + |
| 252 | + return torch.frombuffer( |
| 253 | + json.dumps(metadata.__dict__).encode(), dtype=torch.uint8 |
| 254 | + ) |
| 255 | + |
| 256 | + @classmethod |
| 257 | + def deserialize( |
| 258 | + cls, |
| 259 | + input: torch.Tensor, |
| 260 | + typename: str, |
| 261 | + device: Optional[torch.device] = None, |
| 262 | + children: Dict[str, nn.Module] = {}, |
| 263 | + ) -> nn.Module: |
| 264 | + if typename != "FeatureProcessedEmbeddingBagCollection": |
| 265 | + raise ValueError( |
| 266 | + f"Expected typename to be EmbeddingBagCollection, got {typename}" |
| 267 | + ) |
| 268 | + raw_bytes = input.numpy().tobytes() |
| 269 | + metadata = FPEBCMetadata(**json.loads(raw_bytes.decode())) |
| 270 | + if metadata.is_fp_collection: |
| 271 | + feature_processors = children["_feature_processors"] |
| 272 | + assert isinstance(feature_processors, FeatureProcessorsCollection) |
| 273 | + else: |
| 274 | + feature_processors: dict[str, FeatureProcessor] = {} |
| 275 | + for feature in metadata.feature_list: |
| 276 | + fp = children[f"_feature_processors.{feature}"] |
| 277 | + assert isinstance(fp, FeatureProcessor) |
| 278 | + feature_processors[feature] = fp |
| 279 | + ebc = children["_embedding_bag_collection"] |
| 280 | + assert isinstance(ebc, EmbeddingBagCollection) |
| 281 | + return FeatureProcessedEmbeddingBagCollection( |
| 282 | + ebc, |
| 283 | + feature_processors, |
| 284 | + ) |
| 285 | + |
| 286 | + |
135 | 287 | class JsonSerializer(SerializerInterface):
|
136 | 288 | """
|
137 |
| - Serializer for torch.export IR using thrift. |
| 289 | + Serializer for torch.export IR using json. |
138 | 290 | """
|
139 | 291 |
|
140 | 292 | module_to_serializer_cls: Dict[str, Type[SerializerInterface]] = {
|
141 | 293 | "EmbeddingBagCollection": EBCJsonSerializer,
|
| 294 | + "FeatureProcessedEmbeddingBagCollection": FPEBCJsonSerializer, |
| 295 | + "PositionWeightedModule": PWMJsonSerializer, |
| 296 | + "PositionWeightedModuleCollection": PWMCJsonSerializer, |
142 | 297 | }
|
143 | 298 |
|
144 | 299 | @classmethod
|
|
0 commit comments