logging.basicConfig(
format="%(asctime)s %(levelname)s [%(name)s] %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S"
)
pretrained_model_dir =
quantized_model_dir =
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
examples = []
for i in ds:
examples.append(tokenizer(i))
quantize_config = BaseQuantizeConfig(
bits=4,
group_size=128,
desc_act=False,
)
model = AutoGPTQForCausalLM.from_pretrained(pretrained_model_dir, quantize_config)
model.quantize(examples[:1000])
model.save_quantized(quantized_model_dir)
model.save_quantized(quantized_model_dir, use_safetensors=True)