From c611a36b6c908753131cc6eba416c1f3772922d6 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Mon, 4 May 2026 04:58:08 -0700 Subject: [PATCH 01/24] test: add unit tests for GCG core algorithm components Add 26 new unit tests covering: - get_filtered_cands: filtering, clamping, padding behavior - target_loss / control_loss: shape, finiteness, loss ordering - sample_control: shape, vocab bounds, single-position changes, non-ASCII filtering - _build_params: ConfigDict construction from kwargs - _apply_target_augmentation: length preservation, modification, seed reproducibility - _create_attack: transfer flag routing (Progressive vs Individual) - Embedding helpers: error handling for unknown model types - PromptManager init: validation of goals/targets - EvaluateAttack init: worker count validation Total GCG test count: 24 -> 50 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../auxiliary_attacks/gcg/test_gcg_core.py | 528 ++++++++++++++++++ 1 file changed, 528 insertions(+) create mode 100644 tests/unit/auxiliary_attacks/gcg/test_gcg_core.py diff --git a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py new file mode 100644 index 0000000000..d21074e1c2 --- /dev/null +++ b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py @@ -0,0 +1,528 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from unittest.mock import MagicMock, patch + +import numpy as np +import pytest + +attack_manager_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.attack.base.attack_manager", + reason="GCG optional dependencies (torch, mlflow, etc.) not installed", +) +torch = pytest.importorskip("torch", reason="torch not installed") + +MultiPromptAttack = attack_manager_mod.MultiPromptAttack +get_embedding_layer = attack_manager_mod.get_embedding_layer +get_embedding_matrix = attack_manager_mod.get_embedding_matrix +get_embeddings = attack_manager_mod.get_embeddings + +gcg_attack_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.attack.gcg.gcg_attack", + reason="GCG optional dependencies not installed", +) +GCGPromptManager = gcg_attack_mod.GCGPromptManager +token_gradients = gcg_attack_mod.token_gradients + + +class TestGetFilteredCands: + """Tests for MultiPromptAttack.get_filtered_cands.""" + + def _make_attack_with_worker(self, *, vocab_size: int = 100) -> tuple: + """Create a minimal MultiPromptAttack with a mocked worker for get_filtered_cands.""" + attack = object.__new__(MultiPromptAttack) + mock_worker = MagicMock() + mock_worker.tokenizer.vocab_size = vocab_size + # Mock decode to return a simple string representation + mock_worker.tokenizer.decode.side_effect = lambda ids, **kwargs: "tok_" + "_".join(str(t) for t in ids.tolist()) + # Mock tokenizer call to return input_ids matching the length of input + mock_worker.tokenizer.side_effect = lambda text, **kwargs: MagicMock( + input_ids=list(range(len(text.split("_")) - 1)) + ) + # "!" token maps to id 0 + mock_worker.tokenizer.__call__ = mock_worker.tokenizer.side_effect + first_call = MagicMock() + first_call.input_ids = [0] + mock_worker.tokenizer.return_value = first_call + attack.workers = [mock_worker] + return attack, mock_worker + + def test_returns_list_of_strings(self) -> None: + """get_filtered_cands should return a list of decoded strings.""" + attack, worker = self._make_attack_with_worker() + # Simple decode: each row -> "tok_X_Y" + worker.tokenizer.decode.side_effect = lambda ids, **kwargs: f"ctrl_{ids[0]}" + worker.tokenizer.side_effect = lambda text, **kwargs: MagicMock(input_ids=[0]) + + cands = torch.tensor([[5], [6], [7]]) + result = attack.get_filtered_cands(0, cands, filter_cand=False) + assert isinstance(result, list) + assert len(result) == 3 + assert all(isinstance(s, str) for s in result) + + def test_filter_cand_false_returns_all(self) -> None: + """With filter_cand=False, all candidates should be returned.""" + attack, worker = self._make_attack_with_worker() + worker.tokenizer.decode.side_effect = lambda ids, **kwargs: f"ctrl_{ids[0]}" + # Reset side_effect so return_value is used for tokenizer("!") call + worker.tokenizer.side_effect = None + worker.tokenizer.return_value = MagicMock(input_ids=[0]) + + cands = torch.tensor([[5], [6], [7]]) + result = attack.get_filtered_cands(0, cands, filter_cand=False) + assert len(result) == 3 + + def test_clamps_out_of_vocab_tokens(self) -> None: + """Tokens above vocab_size should be replaced.""" + attack, worker = self._make_attack_with_worker(vocab_size=10) + worker.tokenizer.decode.side_effect = lambda ids, **kwargs: f"ctrl_{ids[0]}" + worker.tokenizer.side_effect = lambda text, **kwargs: MagicMock(input_ids=[0]) + + cands = torch.tensor([[5], [15], [7]]) # 15 > vocab_size=10 + attack.get_filtered_cands(0, cands, filter_cand=False) + # After clamping, the out-of-range token should have been replaced + assert cands[1][0].item() != 15 + + def test_filter_cand_true_pads_to_batch_size(self) -> None: + """With filter_cand=True, result should be padded to match input batch size.""" + attack, worker = self._make_attack_with_worker() + # Make all candidates decode to the same as curr_control so they get filtered out + worker.tokenizer.decode.side_effect = lambda ids, **kwargs: "same_control" + worker.tokenizer.side_effect = lambda text, **kwargs: MagicMock(input_ids=[0]) + + # But make the last one different + decode_results = ["same_control", "same_control", "different"] + call_count = [0] + + def decode_fn(ids, **kwargs): + idx = min(call_count[0], len(decode_results) - 1) + call_count[0] += 1 + return decode_results[idx] + + worker.tokenizer.decode.side_effect = decode_fn + worker.tokenizer.side_effect = lambda text, **kwargs: MagicMock(input_ids=[0]) + + cands = torch.tensor([[1], [2], [3]]) + result = attack.get_filtered_cands(0, cands, filter_cand=True, curr_control="same_control") + # Should always return exactly len(cands) results + assert len(result) == 3 + + +class TestTargetAndControlLoss: + """Tests for AttackPrompt.target_loss and control_loss.""" + + def test_target_loss_returns_correct_shape(self) -> None: + """target_loss should return tensor of shape (batch, target_len).""" + AttackPrompt = attack_manager_mod.AttackPrompt + prompt = object.__new__(AttackPrompt) + prompt._target_slice = slice(5, 8) # 3 target tokens + + batch_size = 4 + seq_len = 10 + vocab_size = 50 + logits = torch.randn(batch_size, seq_len, vocab_size) + ids = torch.randint(0, vocab_size, (batch_size, seq_len)) + + loss = prompt.target_loss(logits, ids) + assert loss.shape == (batch_size, 3) + + def test_target_loss_is_finite(self) -> None: + """target_loss should always return finite values.""" + AttackPrompt = attack_manager_mod.AttackPrompt + prompt = object.__new__(AttackPrompt) + prompt._target_slice = slice(3, 6) + + logits = torch.randn(2, 8, 30) + ids = torch.randint(0, 30, (2, 8)) + + loss = prompt.target_loss(logits, ids) + assert torch.isfinite(loss).all() + + def test_control_loss_returns_correct_shape(self) -> None: + """control_loss should return tensor of shape (batch, control_len).""" + AttackPrompt = attack_manager_mod.AttackPrompt + prompt = object.__new__(AttackPrompt) + prompt._control_slice = slice(2, 5) # 3 control tokens + + batch_size = 4 + seq_len = 10 + vocab_size = 50 + logits = torch.randn(batch_size, seq_len, vocab_size) + ids = torch.randint(0, vocab_size, (batch_size, seq_len)) + + loss = prompt.control_loss(logits, ids) + assert loss.shape == (batch_size, 3) + + def test_control_loss_is_finite(self) -> None: + """control_loss should always return finite values.""" + AttackPrompt = attack_manager_mod.AttackPrompt + prompt = object.__new__(AttackPrompt) + prompt._control_slice = slice(2, 5) + + logits = torch.randn(2, 8, 30) + ids = torch.randint(0, 30, (2, 8)) + + loss = prompt.control_loss(logits, ids) + assert torch.isfinite(loss).all() + + def test_target_loss_higher_for_wrong_predictions(self) -> None: + """Loss should be higher when logits don't predict the correct target tokens.""" + AttackPrompt = attack_manager_mod.AttackPrompt + prompt = object.__new__(AttackPrompt) + prompt._target_slice = slice(3, 5) + + vocab_size = 10 + ids = torch.zeros(1, 6, dtype=torch.long) + ids[0, 3] = 2 + ids[0, 4] = 3 + + # Logits that perfectly predict the target + good_logits = torch.full((1, 6, vocab_size), -10.0) + good_logits[0, 2, 2] = 10.0 # predicts token 2 at position 3 + good_logits[0, 3, 3] = 10.0 # predicts token 3 at position 4 + + # Logits that predict wrong tokens + bad_logits = torch.full((1, 6, vocab_size), -10.0) + bad_logits[0, 2, 7] = 10.0 # predicts wrong token + bad_logits[0, 3, 8] = 10.0 # predicts wrong token + + good_loss = prompt.target_loss(good_logits, ids).mean() + bad_loss = prompt.target_loss(bad_logits, ids).mean() + assert bad_loss > good_loss + + +class TestSampleControl: + """Tests for GCGPromptManager.sample_control.""" + + def _make_prompt_manager(self, *, n_control_tokens: int = 5, vocab_size: int = 50) -> GCGPromptManager: + """Create a minimal GCGPromptManager with stubbed internals for sample_control testing.""" + pm = object.__new__(GCGPromptManager) + pm._nonascii_toks = torch.tensor([]) + # Simulate control_toks property + pm._prompts = [MagicMock()] + pm._prompts[0].control_toks = torch.randint(0, vocab_size, (n_control_tokens,)) + return pm + + def test_returns_correct_shape(self) -> None: + """sample_control should return (batch_size, n_control_tokens) tensor.""" + n_control = 5 + vocab_size = 50 + batch_size = 16 + pm = self._make_prompt_manager(n_control_tokens=n_control, vocab_size=vocab_size) + + grad = torch.randn(n_control, vocab_size) + result = pm.sample_control(grad, batch_size, topk=10) + assert result.shape == (batch_size, n_control) + + def test_output_tokens_within_vocab(self) -> None: + """All sampled tokens should be within vocabulary range.""" + n_control = 5 + vocab_size = 50 + batch_size = 32 + pm = self._make_prompt_manager(n_control_tokens=n_control, vocab_size=vocab_size) + + grad = torch.randn(n_control, vocab_size) + result = pm.sample_control(grad, batch_size, topk=10) + assert (result >= 0).all() + assert (result < vocab_size).all() + + def test_each_candidate_differs_in_one_position(self) -> None: + """Each candidate should differ from the original in exactly one position.""" + n_control = 10 + vocab_size = 50 + batch_size = 8 + pm = self._make_prompt_manager(n_control_tokens=n_control, vocab_size=vocab_size) + + grad = torch.randn(n_control, vocab_size) + original_toks = pm._prompts[0].control_toks.clone() + result = pm.sample_control(grad, batch_size, topk=10) + + for i in range(batch_size): + diffs = (result[i] != original_toks.to(result.device)).sum().item() + # Each candidate changes exactly 1 position + assert diffs == 1, f"Candidate {i} differs in {diffs} positions, expected 1" + + def test_non_ascii_filtering(self) -> None: + """When allow_non_ascii=False, the newly sampled token should not be non-ASCII. + + Note: sample_control only changes ONE position per candidate, so unchanged positions + may still contain non-ASCII tokens from the original control. We verify that the + *changed* position doesn't use a non-ASCII token. + """ + n_control = 5 + vocab_size = 20 + batch_size = 64 + pm = self._make_prompt_manager(n_control_tokens=n_control, vocab_size=vocab_size) + # Use only ASCII tokens in original control + pm._prompts[0].control_toks = torch.tensor([0, 1, 2, 3, 4]) + # Mark tokens 15-19 as non-ASCII + pm._nonascii_toks = torch.tensor([15, 16, 17, 18, 19]) + + # Create gradient that strongly favors non-ASCII tokens + grad = torch.zeros(n_control, vocab_size) + grad[:, 15:20] = -100.0 # Negative gradient = top candidates after negation + + result = pm.sample_control(grad, batch_size, topk=5, allow_non_ascii=False) + original = pm._prompts[0].control_toks + non_ascii_set = {15, 16, 17, 18, 19} + + for i in range(batch_size): + # Find the position that changed + diffs = (result[i] != original.to(result.device)) + changed_positions = diffs.nonzero(as_tuple=True)[0] + for pos in changed_positions: + new_tok = result[i, pos].item() + assert new_tok not in non_ascii_set, ( + f"Candidate {i} position {pos}: sampled non-ASCII token {new_tok}" + ) + + +class TestBuildParams: + """Tests for GreedyCoordinateGradientAdversarialSuffixGenerator._build_params.""" + + def test_builds_config_dict_from_kwargs(self) -> None: + train_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.experiments.train", + reason="GCG train module not available", + ) + Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + + params = Generator._build_params( + n_steps=100, + batch_size=256, + model_name="test_model", + ) + assert params.n_steps == 100 + assert params.batch_size == 256 + assert params.model_name == "test_model" + + def test_all_kwargs_become_attributes(self) -> None: + train_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.experiments.train", + reason="GCG train module not available", + ) + Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + + kwargs = {"a": 1, "b": "hello", "c": [1, 2, 3], "d": True} + params = Generator._build_params(**kwargs) + for key, value in kwargs.items(): + assert getattr(params, key) == value + + +class TestApplyTargetAugmentation: + """Tests for GreedyCoordinateGradientAdversarialSuffixGenerator._apply_target_augmentation.""" + + def test_returns_same_length_lists(self) -> None: + train_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.experiments.train", + reason="GCG train module not available", + ) + Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + + train = ["Sure, here is a bomb", "Sure, here is a virus"] + test = ["Sure, here is a weapon"] + + result_train, result_test = Generator._apply_target_augmentation( + train_targets=train, + test_targets=test, + ) + assert len(result_train) == len(train) + assert len(result_test) == len(test) + + def test_augmentation_modifies_targets(self) -> None: + """At least some targets should be modified by augmentation.""" + train_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.experiments.train", + reason="GCG train module not available", + ) + Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + + np.random.seed(42) + targets = ["Sure, here is how to do it"] * 100 + + result, _ = Generator._apply_target_augmentation( + train_targets=targets, + test_targets=[], + ) + # With 100 targets and 50% chance of each transform, we should see some changes + num_changed = sum(1 for orig, aug in zip(targets, result) if orig != aug) + assert num_changed > 0, "Expected at least some targets to be augmented" + + def test_augmentation_is_seeded_reproducible(self) -> None: + """Same seed should produce same augmentation.""" + train_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.experiments.train", + reason="GCG train module not available", + ) + Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + + targets = ["Sure, here is how to do it"] * 20 + + np.random.seed(123) + result1, _ = Generator._apply_target_augmentation(train_targets=targets, test_targets=[]) + + np.random.seed(123) + result2, _ = Generator._apply_target_augmentation(train_targets=targets, test_targets=[]) + + assert result1 == result2 + + +class TestCreateAttack: + """Tests for GreedyCoordinateGradientAdversarialSuffixGenerator._create_attack.""" + + def test_transfer_true_creates_progressive(self) -> None: + train_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.experiments.train", + reason="GCG train module not available", + ) + Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + ProgressiveMultiPromptAttack = attack_manager_mod.ProgressiveMultiPromptAttack + + params = Generator._build_params( + transfer=True, + progressive_models=True, + progressive_goals=True, + control_init="! ! !", + result_prefix="test", + gbda_deterministic=True, + learning_rate=0.01, + batch_size=512, + n_steps=100, + ) + + mock_worker = MagicMock() + mock_worker.model.name_or_path = "test-model" + mock_worker.tokenizer.name_or_path = "test-tokenizer" + mock_worker.conv_template.name = "test-template" + + managers = { + "AP": MagicMock(), + "PM": MagicMock(), + "MPA": MagicMock(return_value=MagicMock()), + } + + attack = Generator._create_attack( + params=params, + managers=managers, + train_goals=["goal1"], + train_targets=["target1"], + test_goals=[], + test_targets=[], + workers=[mock_worker], + test_workers=[], + ) + assert isinstance(attack, ProgressiveMultiPromptAttack) + + def test_transfer_false_creates_individual(self) -> None: + train_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.experiments.train", + reason="GCG train module not available", + ) + Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + IndividualPromptAttack = attack_manager_mod.IndividualPromptAttack + + params = Generator._build_params( + transfer=False, + control_init="! ! !", + result_prefix="test", + gbda_deterministic=True, + learning_rate=0.01, + batch_size=512, + n_steps=100, + ) + + mock_worker = MagicMock() + mock_worker.model.name_or_path = "test-model" + mock_worker.tokenizer.name_or_path = "test-tokenizer" + mock_worker.conv_template.name = "test-template" + + managers = { + "AP": MagicMock(), + "PM": MagicMock(), + "MPA": MagicMock(return_value=MagicMock()), + } + + attack = Generator._create_attack( + params=params, + managers=managers, + train_goals=["goal1"], + train_targets=["target1"], + test_goals=[], + test_targets=[], + workers=[mock_worker], + test_workers=[], + ) + assert isinstance(attack, IndividualPromptAttack) + + +class TestEmbeddingHelpers: + """Tests for get_embedding_layer, get_embedding_matrix, get_embeddings.""" + + def test_get_embedding_layer_raises_for_unknown_model(self) -> None: + """Should raise ValueError for unsupported model types.""" + mock_model = MagicMock() + # Ensure it doesn't match any isinstance checks + mock_model.__class__ = type("UnknownModel", (), {}) + with pytest.raises(ValueError, match="Unknown model type"): + get_embedding_layer(mock_model) + + def test_get_embedding_matrix_raises_for_unknown_model(self) -> None: + mock_model = MagicMock() + mock_model.__class__ = type("UnknownModel", (), {}) + with pytest.raises(ValueError, match="Unknown model type"): + get_embedding_matrix(mock_model) + + def test_get_embeddings_raises_for_unknown_model(self) -> None: + mock_model = MagicMock() + mock_model.__class__ = type("UnknownModel", (), {}) + with pytest.raises(ValueError, match="Unknown model type"): + get_embeddings(mock_model, torch.tensor([1, 2, 3])) + + +class TestPromptManagerInit: + """Tests for PromptManager initialization validation.""" + + def test_raises_on_mismatched_goals_targets(self) -> None: + PromptManager = attack_manager_mod.PromptManager + with pytest.raises(ValueError, match="Length of goals and targets must match"): + PromptManager( + goals=["goal1", "goal2"], + targets=["target1"], + tokenizer=MagicMock(), + conv_template=MagicMock(), + managers={"AP": MagicMock()}, + ) + + def test_raises_on_empty_goals(self) -> None: + PromptManager = attack_manager_mod.PromptManager + with pytest.raises(ValueError, match="Must provide at least one goal"): + PromptManager( + goals=[], + targets=[], + tokenizer=MagicMock(), + conv_template=MagicMock(), + managers={"AP": MagicMock()}, + ) + + +class TestEvaluateAttackInit: + """Tests for EvaluateAttack initialization validation.""" + + def test_raises_with_multiple_workers(self) -> None: + EvaluateAttack = attack_manager_mod.EvaluateAttack + mock_worker1 = MagicMock() + mock_worker1.model.name_or_path = "m1" + mock_worker1.tokenizer.name_or_path = "t1" + mock_worker1.conv_template.name = "c1" + mock_worker2 = MagicMock() + mock_worker2.model.name_or_path = "m2" + mock_worker2.tokenizer.name_or_path = "t2" + mock_worker2.conv_template.name = "c2" + + with pytest.raises(ValueError, match="exactly 1 worker"): + EvaluateAttack( + goals=["goal"], + targets=["target"], + workers=[mock_worker1, mock_worker2], + managers={"AP": MagicMock(), "PM": MagicMock(), "MPA": MagicMock()}, + ) From 9a2e7fc03fb53caf2996620a9071985b7cffeb44 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Mon, 4 May 2026 05:05:19 -0700 Subject: [PATCH 02/24] test: add data/config and lifecycle tests for GCG Data & config tests (test_data_and_config.py, 12 tests): - YAML loading: valid files, list values, missing file error - Real config validation: all 11 shipped configs parse, have required keys, individual vs transfer configs have correct settings - get_goals_and_targets: seed reproducibility, different seeds differ, separate test data files, n_train_data limiting - run_trainer validation: unsupported model names, missing HF token Lifecycle tests (test_lifecycle.py, 7 tests): - GPU memory: nvidia-smi parsing (single/multi GPU), MLflow logging, failure handling - generate_suffix lifecycle: MLflow started before training, workers stopped after training, BUG CHARACTERIZATION: workers NOT stopped on failure (leak) Total GCG test count: 24 -> 69 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../gcg/test_data_and_config.py | 245 ++++++++++++++++++ .../auxiliary_attacks/gcg/test_lifecycle.py | 201 ++++++++++++++ 2 files changed, 446 insertions(+) create mode 100644 tests/unit/auxiliary_attacks/gcg/test_data_and_config.py create mode 100644 tests/unit/auxiliary_attacks/gcg/test_lifecycle.py diff --git a/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py b/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py new file mode 100644 index 0000000000..2777bb1eea --- /dev/null +++ b/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py @@ -0,0 +1,245 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import os +import tempfile +from unittest.mock import MagicMock, patch + +import pytest + +attack_manager_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.attack.base.attack_manager", + reason="GCG optional dependencies (torch, mlflow, etc.) not installed", +) +get_goals_and_targets = attack_manager_mod.get_goals_and_targets + +run_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.experiments.run", + reason="GCG run module not available", +) +_load_yaml_to_dict = run_mod._load_yaml_to_dict +run_trainer = run_mod.run_trainer + +CONFIGS_DIR = os.path.join( + os.path.dirname(__file__), + "..", + "..", + "..", + "..", + "pyrit", + "auxiliary_attacks", + "gcg", + "experiments", + "configs", +) + + +class TestLoadYamlToDict: + """Tests for YAML config loading.""" + + def test_loads_valid_yaml(self) -> None: + """Should parse a valid YAML file into a dict.""" + content = "n_steps: 100\nbatch_size: 256\ntransfer: False\n" + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + f.write(content) + path = f.name + + try: + result = _load_yaml_to_dict(path) + assert result == {"n_steps": 100, "batch_size": 256, "transfer": False} + finally: + os.unlink(path) + + def test_loads_list_values(self) -> None: + """Should handle YAML list values correctly.""" + content = 'model_paths: ["model/a", "model/b"]\ndevices: ["cuda:0", "cuda:1"]\n' + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + f.write(content) + path = f.name + + try: + result = _load_yaml_to_dict(path) + assert result["model_paths"] == ["model/a", "model/b"] + assert result["devices"] == ["cuda:0", "cuda:1"] + finally: + os.unlink(path) + + def test_raises_on_missing_file(self) -> None: + """Should raise FileNotFoundError for nonexistent config.""" + with pytest.raises(FileNotFoundError): + _load_yaml_to_dict("/nonexistent/config.yaml") + + +class TestRealConfigFiles: + """Tests that the shipped YAML config files parse correctly and have expected keys.""" + + @pytest.fixture() + def config_files(self) -> list[str]: + """Return list of all YAML config files shipped with GCG.""" + configs_dir = os.path.normpath(CONFIGS_DIR) + if not os.path.isdir(configs_dir): + pytest.skip(f"Config directory not found: {configs_dir}") + return [os.path.join(configs_dir, f) for f in os.listdir(configs_dir) if f.endswith(".yaml")] + + def test_all_configs_parse_without_error(self, config_files: list[str]) -> None: + """Every shipped YAML config should parse into a non-empty dict.""" + assert len(config_files) > 0, "No config files found" + for path in config_files: + result = _load_yaml_to_dict(path) + assert isinstance(result, dict), f"{path} did not parse to dict" + assert len(result) > 0, f"{path} parsed to empty dict" + + def test_all_configs_have_required_keys(self, config_files: list[str]) -> None: + """Every config should have the minimum required keys for GCG.""" + required_keys = { + "tokenizer_paths", + "model_paths", + "conversation_templates", + "devices", + } + for path in config_files: + config = _load_yaml_to_dict(path) + missing = required_keys - set(config.keys()) + assert not missing, f"{os.path.basename(path)} missing keys: {missing}" + + def test_individual_vs_transfer_configs_differ(self, config_files: list[str]) -> None: + """Individual configs should have transfer=False, transfer configs transfer=True.""" + for path in config_files: + config = _load_yaml_to_dict(path) + basename = os.path.basename(path) + if basename.startswith("individual_"): + assert config.get("transfer") is False, f"{basename} should have transfer=False" + elif basename.startswith("transfer_"): + assert config.get("transfer") is True or config.get("progressive_goals") is True, ( + f"{basename} should use transfer or progressive_goals" + ) + + +class TestGetGoalsAndTargetsAdditional: + """Additional tests for get_goals_and_targets beyond the existing file.""" + + def test_shuffle_is_reproducible_with_same_seed(self) -> None: + """Same random_seed should produce the same goal/target ordering.""" + csv_content = "goal,target\n" + "\n".join(f"goal{i},target{i}" for i in range(20)) + "\n" + with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: + f.write(csv_content) + csv_path = f.name + + try: + params1 = MagicMock() + params1.train_data = csv_path + params1.n_train_data = 10 + params1.n_test_data = 0 + params1.test_data = "" + params1.random_seed = 42 + + params2 = MagicMock() + params2.train_data = csv_path + params2.n_train_data = 10 + params2.n_test_data = 0 + params2.test_data = "" + params2.random_seed = 42 + + goals1, targets1, _, _ = get_goals_and_targets(params1) + goals2, targets2, _, _ = get_goals_and_targets(params2) + + assert goals1 == goals2 + assert targets1 == targets2 + finally: + os.unlink(csv_path) + + def test_different_seeds_produce_different_ordering(self) -> None: + """Different seeds should (almost certainly) produce different orderings.""" + csv_content = "goal,target\n" + "\n".join(f"goal{i},target{i}" for i in range(50)) + "\n" + with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: + f.write(csv_content) + csv_path = f.name + + try: + params1 = MagicMock() + params1.train_data = csv_path + params1.n_train_data = 50 + params1.n_test_data = 0 + params1.test_data = "" + params1.random_seed = 42 + + params2 = MagicMock() + params2.train_data = csv_path + params2.n_train_data = 50 + params2.n_test_data = 0 + params2.test_data = "" + params2.random_seed = 99 + + goals1, _, _, _ = get_goals_and_targets(params1) + goals2, _, _, _ = get_goals_and_targets(params2) + + assert goals1 != goals2, "Different seeds should produce different orderings" + finally: + os.unlink(csv_path) + + def test_separate_test_data_file(self) -> None: + """Should load test data from a separate CSV file when provided.""" + train_csv = "goal,target\ntrain_goal1,train_target1\ntrain_goal2,train_target2\n" + test_csv = "goal,target\ntest_goal1,test_target1\n" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: + f.write(train_csv) + train_path = f.name + with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: + f.write(test_csv) + test_path = f.name + + try: + params = MagicMock() + params.train_data = train_path + params.n_train_data = 2 + params.n_test_data = 1 + params.test_data = test_path + params.random_seed = 42 + + train_goals, train_targets, test_goals, test_targets = get_goals_and_targets(params) + assert len(train_goals) == 2 + assert len(test_goals) == 1 + assert test_goals[0] == "test_goal1" + assert test_targets[0] == "test_target1" + finally: + os.unlink(train_path) + os.unlink(test_path) + + def test_n_train_data_limits_output(self) -> None: + """n_train_data should cap the number of returned training examples.""" + csv_content = "goal,target\n" + "\n".join(f"goal{i},target{i}" for i in range(100)) + "\n" + with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: + f.write(csv_content) + csv_path = f.name + + try: + params = MagicMock() + params.train_data = csv_path + params.n_train_data = 5 + params.n_test_data = 0 + params.test_data = "" + params.random_seed = 42 + + goals, targets, _, _ = get_goals_and_targets(params) + assert len(goals) == 5 + assert len(targets) == 5 + finally: + os.unlink(csv_path) + + +class TestRunTrainerValidation: + """Tests for run_trainer input validation (no actual model loading).""" + + def test_raises_on_unsupported_model_name(self) -> None: + """Should raise ValueError for unsupported model names.""" + with pytest.raises(ValueError, match="Model name not supported"): + run_trainer(model_name="nonexistent_model") + + @patch.dict("os.environ", {"HUGGINGFACE_TOKEN": ""}, clear=False) + @patch("pyrit.auxiliary_attacks.gcg.experiments.run._load_environment_files") + def test_raises_without_hf_token(self, mock_load_env: MagicMock) -> None: + """Should raise ValueError when HUGGINGFACE_TOKEN is not set.""" + with patch.dict("os.environ", {"HUGGINGFACE_TOKEN": ""}, clear=False): + with pytest.raises(ValueError, match="HUGGINGFACE_TOKEN"): + run_trainer(model_name="phi_3_mini") diff --git a/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py b/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py new file mode 100644 index 0000000000..7792c98224 --- /dev/null +++ b/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py @@ -0,0 +1,201 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import subprocess +from unittest.mock import MagicMock, patch + +import pytest + +log_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.experiments.log", + reason="GCG optional dependencies (mlflow, etc.) not installed", +) +log_gpu_memory = log_mod.log_gpu_memory +get_gpu_memory = log_mod.get_gpu_memory + +train_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.experiments.train", + reason="GCG train module not available", +) +Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + + +class TestGpuMemoryLogging: + """Tests for GPU memory query and logging.""" + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_get_gpu_memory_parses_nvidia_smi(self, mock_sp: MagicMock) -> None: + """Should parse nvidia-smi output into a dict of GPU -> free memory.""" + mock_sp.check_output.return_value = b"memory.free [MiB]\n8000 MiB\n16000 MiB\n" + result = get_gpu_memory() + assert result == {"gpu1_free_memory": 8000, "gpu2_free_memory": 16000} + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_get_gpu_memory_single_gpu(self, mock_sp: MagicMock) -> None: + """Should handle single GPU output.""" + mock_sp.check_output.return_value = b"memory.free [MiB]\n24000 MiB\n" + result = get_gpu_memory() + assert result == {"gpu1_free_memory": 24000} + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_log_gpu_memory_logs_to_mlflow(self, mock_sp: MagicMock, mock_mlflow: MagicMock) -> None: + """Should log each GPU's free memory as an MLflow metric.""" + mock_sp.check_output.return_value = b"memory.free [MiB]\n8000 MiB\n16000 MiB\n" + log_gpu_memory(step=5) + + assert mock_mlflow.log_metric.call_count == 2 + calls = mock_mlflow.log_metric.call_args_list + assert calls[0].args == ("gpu1_free_memory", 8000) + assert calls[0].kwargs["step"] == 5 + assert calls[1].args == ("gpu2_free_memory", 16000) + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_get_gpu_memory_handles_nvidia_smi_failure(self, mock_sp: MagicMock) -> None: + """Should propagate exception when nvidia-smi is not available.""" + mock_sp.check_output.side_effect = subprocess.CalledProcessError(1, "nvidia-smi") + with pytest.raises(subprocess.CalledProcessError): + get_gpu_memory() + + +class TestGenerateSuffixLifecycle: + """Tests for generate_suffix MLflow and worker lifecycle management.""" + + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_workers") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_goals_and_targets") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_gpu_memory") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_params") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_train_goals") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.mlflow") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.attack_lib") + def test_mlflow_run_started_before_training( + self, + mock_attack_lib: MagicMock, + mock_mlflow: MagicMock, + mock_log_train_goals: MagicMock, + mock_log_params: MagicMock, + mock_log_gpu_memory: MagicMock, + mock_get_goals: MagicMock, + mock_get_workers: MagicMock, + ) -> None: + """MLflow run should be started before any training begins.""" + mock_get_goals.return_value = (["goal1"], ["target1"], [], []) + mock_worker = MagicMock() + mock_worker.model.name_or_path = "test-model" + mock_worker.tokenizer.name_or_path = "test-tokenizer" + mock_worker.conv_template.name = "test-template" + mock_get_workers.return_value = ([mock_worker], []) + + mock_attack_instance = MagicMock() + mock_attack_lib.GCGAttackPrompt = MagicMock + mock_attack_lib.GCGPromptManager = MagicMock + mock_attack_lib.GCGMultiPromptAttack = MagicMock + + # Patch _create_attack to avoid IndividualPromptAttack's logfile writing + with patch.object(Generator, "_create_attack", return_value=mock_attack_instance): + generator = Generator.__new__(Generator) + generator.generate_suffix( + tokenizer_paths=["test/path"], + model_paths=["test/path"], + conversation_templates=["llama-2"], + train_data="", + n_steps=1, + ) + + mock_mlflow.start_run.assert_called_once() + + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_workers") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_goals_and_targets") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_gpu_memory") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_params") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_train_goals") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.mlflow") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.attack_lib") + def test_workers_stopped_after_training( + self, + mock_attack_lib: MagicMock, + mock_mlflow: MagicMock, + mock_log_train_goals: MagicMock, + mock_log_params: MagicMock, + mock_log_gpu_memory: MagicMock, + mock_get_goals: MagicMock, + mock_get_workers: MagicMock, + ) -> None: + """All workers should be stopped after training completes.""" + mock_get_goals.return_value = (["goal1"], ["target1"], [], []) + mock_worker1 = MagicMock() + mock_worker1.model.name_or_path = "test-model-1" + mock_worker1.tokenizer.name_or_path = "test-tokenizer-1" + mock_worker1.conv_template.name = "test-template-1" + mock_worker2 = MagicMock() + mock_worker2.model.name_or_path = "test-model-2" + mock_worker2.tokenizer.name_or_path = "test-tokenizer-2" + mock_worker2.conv_template.name = "test-template-2" + mock_get_workers.return_value = ([mock_worker1], [mock_worker2]) + + mock_attack_instance = MagicMock() + mock_attack_lib.GCGAttackPrompt = MagicMock + mock_attack_lib.GCGPromptManager = MagicMock + mock_attack_lib.GCGMultiPromptAttack = MagicMock + + with patch.object(Generator, "_create_attack", return_value=mock_attack_instance): + generator = Generator.__new__(Generator) + generator.generate_suffix( + tokenizer_paths=["test/path"], + model_paths=["test/path"], + conversation_templates=["llama-2"], + train_data="", + n_steps=1, + ) + + mock_worker1.stop.assert_called_once() + mock_worker2.stop.assert_called_once() + + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_workers") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_goals_and_targets") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_gpu_memory") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_params") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_train_goals") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.mlflow") + @patch("pyrit.auxiliary_attacks.gcg.experiments.train.attack_lib") + def test_workers_not_stopped_on_training_failure( + self, + mock_attack_lib: MagicMock, + mock_mlflow: MagicMock, + mock_log_train_goals: MagicMock, + mock_log_params: MagicMock, + mock_log_gpu_memory: MagicMock, + mock_get_goals: MagicMock, + mock_get_workers: MagicMock, + ) -> None: + """BUG CHARACTERIZATION: Workers are NOT stopped when attack.run() raises. + + This documents the current (buggy) behavior — workers leak on failure. + A future fix should ensure workers are cleaned up even on exceptions. + """ + mock_get_goals.return_value = (["goal1"], ["target1"], [], []) + mock_worker = MagicMock() + mock_worker.model.name_or_path = "test-model" + mock_worker.tokenizer.name_or_path = "test-tokenizer" + mock_worker.conv_template.name = "test-template" + mock_get_workers.return_value = ([mock_worker], []) + + mock_attack_instance = MagicMock() + mock_attack_instance.run.side_effect = RuntimeError("Simulated failure") + mock_attack_lib.GCGAttackPrompt = MagicMock + mock_attack_lib.GCGPromptManager = MagicMock + mock_attack_lib.GCGMultiPromptAttack = MagicMock + + with patch.object(Generator, "_create_attack", return_value=mock_attack_instance): + generator = Generator.__new__(Generator) + with pytest.raises(RuntimeError, match="Simulated failure"): + generator.generate_suffix( + tokenizer_paths=["test/path"], + model_paths=["test/path"], + conversation_templates=["llama-2"], + train_data="", + n_steps=1, + ) + + # Workers are NOT stopped on failure — this is a bug we'll fix later + mock_worker.stop.assert_not_called() From 612c66f8289a9ecc7a9c1876b11decf80074ba79 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Mon, 4 May 2026 05:20:43 -0700 Subject: [PATCH 03/24] test: add GCG integration tests with real GPT-2 model Add 10 integration tests that exercise the GCG attack pipeline with a real GPT-2 model on CPU, validating end-to-end correctness: - token_gradients: gradient shape matches (n_control, vocab_size), values are finite and non-zero - GCGAttackPrompt: initializes with valid non-overlapping slices, grad() returns correct shape, test_loss() returns finite positive float - GCGPromptManager.sample_control: sampled candidates are decodable, correct batch size - Embedding helpers: layer/matrix/embeddings work with GPT2LMHeadModel, get_nonascii_toks returns non-empty tensor Uses llama-2 conversation template (has explicit handling in _update_ids). Marked @run_only_if_all_tests (requires RUN_ALL_TESTS=true + torch/transformers). Runs in ~18s on CPU. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../auxiliary_attacks/test_gcg_integration.py | 226 ++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 tests/integration/auxiliary_attacks/test_gcg_integration.py diff --git a/tests/integration/auxiliary_attacks/test_gcg_integration.py b/tests/integration/auxiliary_attacks/test_gcg_integration.py new file mode 100644 index 0000000000..d611e13fc9 --- /dev/null +++ b/tests/integration/auxiliary_attacks/test_gcg_integration.py @@ -0,0 +1,226 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Integration tests for GCG attack using a real GPT-2 model on CPU. + +These tests validate that the GCG attack pipeline works end-to-end with a real +(tiny) model. They use GPT-2 (~124M params) which can run on CPU, paired with +the llama-2 conversation template (which has explicit handling in _update_ids). + +Requires: torch, transformers, fastchat, mlflow (GCG optional deps). +Skipped unless RUN_ALL_TESTS=true. +""" + +import pytest + +torch = pytest.importorskip("torch", reason="torch not installed") +transformers = pytest.importorskip("transformers", reason="transformers not installed") +pytest.importorskip("fastchat", reason="fastchat not installed") + +from unittest.mock import MagicMock, patch + +from fastchat.model import get_conversation_template +from transformers import AutoTokenizer, GPT2LMHeadModel + +from pyrit.auxiliary_attacks.gcg.attack.base.attack_manager import ( + MultiPromptAttack, + get_embedding_layer, + get_embedding_matrix, + get_embeddings, + get_nonascii_toks, +) +from pyrit.auxiliary_attacks.gcg.attack.gcg.gcg_attack import ( + GCGAttackPrompt, + GCGMultiPromptAttack, + GCGPromptManager, + token_gradients, +) + + +@pytest.fixture(scope="module") +def gpt2_model() -> GPT2LMHeadModel: + """Load GPT-2 model once for all tests in this module.""" + model = GPT2LMHeadModel.from_pretrained("gpt2").eval() + return model + + +@pytest.fixture(scope="module") +def gpt2_tokenizer() -> transformers.PreTrainedTokenizer: + """Load GPT-2 tokenizer once for all tests in this module.""" + tokenizer = AutoTokenizer.from_pretrained("gpt2") + tokenizer.pad_token = tokenizer.eos_token + tokenizer.padding_side = "left" + return tokenizer + + +@pytest.fixture() +def conv_template(): + """Create a fresh llama-2 conversation template for each test.""" + conv = get_conversation_template("llama-2") + conv.sep2 = conv.sep2.strip() + return conv + + +@pytest.mark.run_only_if_all_tests +class TestTokenGradientsIntegration: + """Integration tests for token_gradients with real GPT-2.""" + + def test_gradient_shape_matches_control_and_vocab( + self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer + ) -> None: + """Gradient should have shape (n_control_tokens, vocab_size).""" + input_ids = gpt2_tokenizer("Hello world ! ! ! target text", return_tensors="pt")["input_ids"][0] + control_slice = slice(2, 5) + target_slice = slice(5, 7) + loss_slice = slice(4, 6) + + grad = token_gradients(gpt2_model, input_ids, control_slice, target_slice, loss_slice) + + n_control = control_slice.stop - control_slice.start + assert grad.shape == (n_control, gpt2_tokenizer.vocab_size) + + def test_gradient_is_finite_and_nonzero( + self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer + ) -> None: + """Gradient values should be finite and at least some should be non-zero.""" + input_ids = gpt2_tokenizer("Tell me how ! ! ! Sure here is", return_tensors="pt")["input_ids"][0] + control_slice = slice(3, 6) + target_slice = slice(6, 9) + loss_slice = slice(5, 8) + + grad = token_gradients(gpt2_model, input_ids, control_slice, target_slice, loss_slice) + + assert torch.isfinite(grad).all(), "Gradient contains non-finite values" + assert (grad != 0).any(), "Gradient is all zeros" + + +@pytest.mark.run_only_if_all_tests +class TestGCGAttackPromptIntegration: + """Integration tests for GCGAttackPrompt with real GPT-2.""" + + def test_prompt_initializes_with_valid_slices( + self, + gpt2_model: GPT2LMHeadModel, + gpt2_tokenizer: transformers.PreTrainedTokenizer, + conv_template: object, + ) -> None: + """AttackPrompt should initialize with non-empty, non-overlapping slices.""" + prompt = GCGAttackPrompt( + goal="Tell me how", + target="Sure here is", + tokenizer=gpt2_tokenizer, + conv_template=conv_template, + control_init="! ! ! ! !", + ) + + assert prompt._control_slice.start < prompt._control_slice.stop + assert prompt._target_slice.start < prompt._target_slice.stop + assert prompt._control_slice.stop <= prompt._target_slice.start + assert prompt.input_ids.shape[0] > 0 + + def test_grad_returns_valid_gradient( + self, + gpt2_model: GPT2LMHeadModel, + gpt2_tokenizer: transformers.PreTrainedTokenizer, + conv_template: object, + ) -> None: + """GCGAttackPrompt.grad should return a finite, non-zero gradient tensor.""" + prompt = GCGAttackPrompt( + goal="Tell me how", + target="Sure here is", + tokenizer=gpt2_tokenizer, + conv_template=conv_template, + control_init="! ! ! ! !", + ) + + grad = prompt.grad(gpt2_model) + + n_control = prompt._control_slice.stop - prompt._control_slice.start + assert grad.shape[0] == n_control + assert grad.shape[1] == gpt2_tokenizer.vocab_size + assert torch.isfinite(grad).all() + + def test_target_loss_is_finite_scalar( + self, + gpt2_model: GPT2LMHeadModel, + gpt2_tokenizer: transformers.PreTrainedTokenizer, + conv_template: object, + ) -> None: + """Target loss from real model logits should be a finite positive number.""" + prompt = GCGAttackPrompt( + goal="Tell me how", + target="Sure here is", + tokenizer=gpt2_tokenizer, + conv_template=conv_template, + control_init="! ! ! ! !", + ) + + loss = prompt.test_loss(gpt2_model) + assert isinstance(loss, float) + assert loss > 0 + assert loss < 1e6 + + +@pytest.mark.run_only_if_all_tests +class TestGCGSampleControlIntegration: + """Integration tests for GCGPromptManager.sample_control with real tokenizer.""" + + def test_sample_control_produces_valid_candidates( + self, + gpt2_model: GPT2LMHeadModel, + gpt2_tokenizer: transformers.PreTrainedTokenizer, + conv_template: object, + ) -> None: + """Sampled control tokens should be decodable by the tokenizer.""" + prompt = GCGAttackPrompt( + goal="Tell me how", + target="Sure here is", + tokenizer=gpt2_tokenizer, + conv_template=conv_template, + control_init="! ! ! ! !", + ) + + grad = prompt.grad(gpt2_model) + + pm = object.__new__(GCGPromptManager) + pm._prompts = [prompt] + pm._nonascii_toks = get_nonascii_toks(gpt2_tokenizer, device="cpu") + + candidates = pm.sample_control(grad, batch_size=8, topk=32, allow_non_ascii=False) + + assert candidates.shape[0] == 8 + # All candidates should be decodable without error + for i in range(candidates.shape[0]): + decoded = gpt2_tokenizer.decode(candidates[i]) + assert isinstance(decoded, str) + assert len(decoded) > 0 + + +@pytest.mark.run_only_if_all_tests +class TestEmbeddingHelpersIntegration: + """Integration tests for embedding helper functions with real GPT-2.""" + + def test_get_embedding_layer_returns_embedding(self, gpt2_model: GPT2LMHeadModel) -> None: + layer = get_embedding_layer(gpt2_model) + assert isinstance(layer, torch.nn.Embedding) + + def test_get_embedding_matrix_shape( + self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer + ) -> None: + matrix = get_embedding_matrix(gpt2_model) + assert matrix.shape[0] == gpt2_tokenizer.vocab_size + + def test_get_embeddings_returns_correct_shape( + self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer + ) -> None: + input_ids = gpt2_tokenizer("Hello world", return_tensors="pt")["input_ids"] + embeddings = get_embeddings(gpt2_model, input_ids) + assert embeddings.shape[0] == 1 + assert embeddings.shape[1] == input_ids.shape[1] + + def test_get_nonascii_toks_returns_nonempty_tensor( + self, gpt2_tokenizer: transformers.PreTrainedTokenizer + ) -> None: + toks = get_nonascii_toks(gpt2_tokenizer, device="cpu") + assert isinstance(toks, torch.Tensor) + assert len(toks) > 0 From df31760a12ba8d1205774a7f35251bc6c16d92a2 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Mon, 4 May 2026 05:36:57 -0700 Subject: [PATCH 04/24] TEST: remove run_only_if_all_tests marker from GCG integration tests These tests only need optional Python packages (torch, transformers, fastchat), not external services or credentials. The importorskip at the top already handles skipping when deps are not installed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/integration/auxiliary_attacks/test_gcg_integration.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/integration/auxiliary_attacks/test_gcg_integration.py b/tests/integration/auxiliary_attacks/test_gcg_integration.py index d611e13fc9..00764b8307 100644 --- a/tests/integration/auxiliary_attacks/test_gcg_integration.py +++ b/tests/integration/auxiliary_attacks/test_gcg_integration.py @@ -61,7 +61,6 @@ def conv_template(): return conv -@pytest.mark.run_only_if_all_tests class TestTokenGradientsIntegration: """Integration tests for token_gradients with real GPT-2.""" @@ -94,7 +93,6 @@ def test_gradient_is_finite_and_nonzero( assert (grad != 0).any(), "Gradient is all zeros" -@pytest.mark.run_only_if_all_tests class TestGCGAttackPromptIntegration: """Integration tests for GCGAttackPrompt with real GPT-2.""" @@ -161,7 +159,6 @@ def test_target_loss_is_finite_scalar( assert loss < 1e6 -@pytest.mark.run_only_if_all_tests class TestGCGSampleControlIntegration: """Integration tests for GCGPromptManager.sample_control with real tokenizer.""" @@ -196,7 +193,6 @@ def test_sample_control_produces_valid_candidates( assert len(decoded) > 0 -@pytest.mark.run_only_if_all_tests class TestEmbeddingHelpersIntegration: """Integration tests for embedding helper functions with real GPT-2.""" From fb5cb70143ccffabfba533bb9f9c9015c52e89a0 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Mon, 4 May 2026 06:34:27 -0700 Subject: [PATCH 05/24] MAINT: fix pre-commit lint issues in GCG tests - Move class references to module level to fix N806 (variable naming) - Add noqa: E402 for imports after importorskip guards - Fix ruff format issues - Remove outdated RUN_ALL_TESTS reference in docstring Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../auxiliary_attacks/test_gcg_integration.py | 20 ++---- .../auxiliary_attacks/gcg/test_gcg_core.py | 65 +++++++++---------- 2 files changed, 36 insertions(+), 49 deletions(-) diff --git a/tests/integration/auxiliary_attacks/test_gcg_integration.py b/tests/integration/auxiliary_attacks/test_gcg_integration.py index 00764b8307..d2e32d5974 100644 --- a/tests/integration/auxiliary_attacks/test_gcg_integration.py +++ b/tests/integration/auxiliary_attacks/test_gcg_integration.py @@ -8,7 +8,7 @@ the llama-2 conversation template (which has explicit handling in _update_ids). Requires: torch, transformers, fastchat, mlflow (GCG optional deps). -Skipped unless RUN_ALL_TESTS=true. +Skipped via importorskip when deps are not installed. """ import pytest @@ -17,21 +17,18 @@ transformers = pytest.importorskip("transformers", reason="transformers not installed") pytest.importorskip("fastchat", reason="fastchat not installed") -from unittest.mock import MagicMock, patch -from fastchat.model import get_conversation_template -from transformers import AutoTokenizer, GPT2LMHeadModel +from fastchat.model import get_conversation_template # noqa: E402 +from transformers import AutoTokenizer, GPT2LMHeadModel # noqa: E402 -from pyrit.auxiliary_attacks.gcg.attack.base.attack_manager import ( - MultiPromptAttack, +from pyrit.auxiliary_attacks.gcg.attack.base.attack_manager import ( # noqa: E402 get_embedding_layer, get_embedding_matrix, get_embeddings, get_nonascii_toks, ) -from pyrit.auxiliary_attacks.gcg.attack.gcg.gcg_attack import ( +from pyrit.auxiliary_attacks.gcg.attack.gcg.gcg_attack import ( # noqa: E402 GCGAttackPrompt, - GCGMultiPromptAttack, GCGPromptManager, token_gradients, ) @@ -40,8 +37,7 @@ @pytest.fixture(scope="module") def gpt2_model() -> GPT2LMHeadModel: """Load GPT-2 model once for all tests in this module.""" - model = GPT2LMHeadModel.from_pretrained("gpt2").eval() - return model + return GPT2LMHeadModel.from_pretrained("gpt2").eval() @pytest.fixture(scope="module") @@ -214,9 +210,7 @@ def test_get_embeddings_returns_correct_shape( assert embeddings.shape[0] == 1 assert embeddings.shape[1] == input_ids.shape[1] - def test_get_nonascii_toks_returns_nonempty_tensor( - self, gpt2_tokenizer: transformers.PreTrainedTokenizer - ) -> None: + def test_get_nonascii_toks_returns_nonempty_tensor(self, gpt2_tokenizer: transformers.PreTrainedTokenizer) -> None: toks = get_nonascii_toks(gpt2_tokenizer, device="cpu") assert isinstance(toks, torch.Tensor) assert len(toks) > 0 diff --git a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py index d21074e1c2..e1568e1a0f 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py +++ b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock import numpy as np import pytest @@ -13,6 +13,11 @@ torch = pytest.importorskip("torch", reason="torch not installed") MultiPromptAttack = attack_manager_mod.MultiPromptAttack +AttackPrompt = attack_manager_mod.AttackPrompt +PromptManager = attack_manager_mod.PromptManager +EvaluateAttack = attack_manager_mod.EvaluateAttack +IndividualPromptAttack = attack_manager_mod.IndividualPromptAttack +ProgressiveMultiPromptAttack = attack_manager_mod.ProgressiveMultiPromptAttack get_embedding_layer = attack_manager_mod.get_embedding_layer get_embedding_matrix = attack_manager_mod.get_embedding_matrix get_embeddings = attack_manager_mod.get_embeddings @@ -113,7 +118,6 @@ class TestTargetAndControlLoss: def test_target_loss_returns_correct_shape(self) -> None: """target_loss should return tensor of shape (batch, target_len).""" - AttackPrompt = attack_manager_mod.AttackPrompt prompt = object.__new__(AttackPrompt) prompt._target_slice = slice(5, 8) # 3 target tokens @@ -128,7 +132,6 @@ def test_target_loss_returns_correct_shape(self) -> None: def test_target_loss_is_finite(self) -> None: """target_loss should always return finite values.""" - AttackPrompt = attack_manager_mod.AttackPrompt prompt = object.__new__(AttackPrompt) prompt._target_slice = slice(3, 6) @@ -140,7 +143,6 @@ def test_target_loss_is_finite(self) -> None: def test_control_loss_returns_correct_shape(self) -> None: """control_loss should return tensor of shape (batch, control_len).""" - AttackPrompt = attack_manager_mod.AttackPrompt prompt = object.__new__(AttackPrompt) prompt._control_slice = slice(2, 5) # 3 control tokens @@ -155,7 +157,6 @@ def test_control_loss_returns_correct_shape(self) -> None: def test_control_loss_is_finite(self) -> None: """control_loss should always return finite values.""" - AttackPrompt = attack_manager_mod.AttackPrompt prompt = object.__new__(AttackPrompt) prompt._control_slice = slice(2, 5) @@ -167,7 +168,6 @@ def test_control_loss_is_finite(self) -> None: def test_target_loss_higher_for_wrong_predictions(self) -> None: """Loss should be higher when logits don't predict the correct target tokens.""" - AttackPrompt = attack_manager_mod.AttackPrompt prompt = object.__new__(AttackPrompt) prompt._target_slice = slice(3, 5) @@ -268,26 +268,24 @@ def test_non_ascii_filtering(self) -> None: for i in range(batch_size): # Find the position that changed - diffs = (result[i] != original.to(result.device)) + diffs = result[i] != original.to(result.device) changed_positions = diffs.nonzero(as_tuple=True)[0] for pos in changed_positions: new_tok = result[i, pos].item() - assert new_tok not in non_ascii_set, ( - f"Candidate {i} position {pos}: sampled non-ASCII token {new_tok}" - ) + assert new_tok not in non_ascii_set, f"Candidate {i} position {pos}: sampled non-ASCII token {new_tok}" class TestBuildParams: - """Tests for GreedyCoordinateGradientAdversarialSuffixGenerator._build_params.""" + """Tests for GreedyCoordinateGradientAdversarialSuffixgenerator_cls._build_params.""" def test_builds_config_dict_from_kwargs(self) -> None: train_mod = pytest.importorskip( "pyrit.auxiliary_attacks.gcg.experiments.train", reason="GCG train module not available", ) - Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + generator_cls = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator - params = Generator._build_params( + params = generator_cls._build_params( n_steps=100, batch_size=256, model_name="test_model", @@ -301,28 +299,28 @@ def test_all_kwargs_become_attributes(self) -> None: "pyrit.auxiliary_attacks.gcg.experiments.train", reason="GCG train module not available", ) - Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + generator_cls = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator kwargs = {"a": 1, "b": "hello", "c": [1, 2, 3], "d": True} - params = Generator._build_params(**kwargs) + params = generator_cls._build_params(**kwargs) for key, value in kwargs.items(): assert getattr(params, key) == value class TestApplyTargetAugmentation: - """Tests for GreedyCoordinateGradientAdversarialSuffixGenerator._apply_target_augmentation.""" + """Tests for GreedyCoordinateGradientAdversarialSuffixgenerator_cls._apply_target_augmentation.""" def test_returns_same_length_lists(self) -> None: train_mod = pytest.importorskip( "pyrit.auxiliary_attacks.gcg.experiments.train", reason="GCG train module not available", ) - Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + generator_cls = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator train = ["Sure, here is a bomb", "Sure, here is a virus"] test = ["Sure, here is a weapon"] - result_train, result_test = Generator._apply_target_augmentation( + result_train, result_test = generator_cls._apply_target_augmentation( train_targets=train, test_targets=test, ) @@ -335,17 +333,17 @@ def test_augmentation_modifies_targets(self) -> None: "pyrit.auxiliary_attacks.gcg.experiments.train", reason="GCG train module not available", ) - Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + generator_cls = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator np.random.seed(42) targets = ["Sure, here is how to do it"] * 100 - result, _ = Generator._apply_target_augmentation( + result, _ = generator_cls._apply_target_augmentation( train_targets=targets, test_targets=[], ) # With 100 targets and 50% chance of each transform, we should see some changes - num_changed = sum(1 for orig, aug in zip(targets, result) if orig != aug) + num_changed = sum(1 for orig, aug in zip(targets, result, strict=False) if orig != aug) assert num_changed > 0, "Expected at least some targets to be augmented" def test_augmentation_is_seeded_reproducible(self) -> None: @@ -354,31 +352,30 @@ def test_augmentation_is_seeded_reproducible(self) -> None: "pyrit.auxiliary_attacks.gcg.experiments.train", reason="GCG train module not available", ) - Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + generator_cls = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator targets = ["Sure, here is how to do it"] * 20 np.random.seed(123) - result1, _ = Generator._apply_target_augmentation(train_targets=targets, test_targets=[]) + result1, _ = generator_cls._apply_target_augmentation(train_targets=targets, test_targets=[]) np.random.seed(123) - result2, _ = Generator._apply_target_augmentation(train_targets=targets, test_targets=[]) + result2, _ = generator_cls._apply_target_augmentation(train_targets=targets, test_targets=[]) assert result1 == result2 class TestCreateAttack: - """Tests for GreedyCoordinateGradientAdversarialSuffixGenerator._create_attack.""" + """Tests for GreedyCoordinateGradientAdversarialSuffixgenerator_cls._create_attack.""" def test_transfer_true_creates_progressive(self) -> None: train_mod = pytest.importorskip( "pyrit.auxiliary_attacks.gcg.experiments.train", reason="GCG train module not available", ) - Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator - ProgressiveMultiPromptAttack = attack_manager_mod.ProgressiveMultiPromptAttack + generator_cls = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator - params = Generator._build_params( + params = generator_cls._build_params( transfer=True, progressive_models=True, progressive_goals=True, @@ -401,7 +398,7 @@ def test_transfer_true_creates_progressive(self) -> None: "MPA": MagicMock(return_value=MagicMock()), } - attack = Generator._create_attack( + attack = generator_cls._create_attack( params=params, managers=managers, train_goals=["goal1"], @@ -418,10 +415,9 @@ def test_transfer_false_creates_individual(self) -> None: "pyrit.auxiliary_attacks.gcg.experiments.train", reason="GCG train module not available", ) - Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator - IndividualPromptAttack = attack_manager_mod.IndividualPromptAttack + generator_cls = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator - params = Generator._build_params( + params = generator_cls._build_params( transfer=False, control_init="! ! !", result_prefix="test", @@ -442,7 +438,7 @@ def test_transfer_false_creates_individual(self) -> None: "MPA": MagicMock(return_value=MagicMock()), } - attack = Generator._create_attack( + attack = generator_cls._create_attack( params=params, managers=managers, train_goals=["goal1"], @@ -483,7 +479,6 @@ class TestPromptManagerInit: """Tests for PromptManager initialization validation.""" def test_raises_on_mismatched_goals_targets(self) -> None: - PromptManager = attack_manager_mod.PromptManager with pytest.raises(ValueError, match="Length of goals and targets must match"): PromptManager( goals=["goal1", "goal2"], @@ -494,7 +489,6 @@ def test_raises_on_mismatched_goals_targets(self) -> None: ) def test_raises_on_empty_goals(self) -> None: - PromptManager = attack_manager_mod.PromptManager with pytest.raises(ValueError, match="Must provide at least one goal"): PromptManager( goals=[], @@ -509,7 +503,6 @@ class TestEvaluateAttackInit: """Tests for EvaluateAttack initialization validation.""" def test_raises_with_multiple_workers(self) -> None: - EvaluateAttack = attack_manager_mod.EvaluateAttack mock_worker1 = MagicMock() mock_worker1.model.name_or_path = "m1" mock_worker1.tokenizer.name_or_path = "t1" From c98af28254d64de054e35039d2523fba34b8b87e Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Tue, 5 May 2026 13:31:41 -0700 Subject: [PATCH 06/24] MAINT: remove mlflow dependency from GCG, fix Dockerfile Remove mlflow and azureml-mlflow from GCG dependencies entirely: - Replace mlflow logging in log.py with Python standard logging - Remove mlflow.start_run()/end_run() from train.py and attack_manager.py - Remove mlflow and azureml-mlflow from gcg and all extras in pyproject.toml - Update tests to not mock mlflow - Fix Dockerfile: use nvidia/cuda base + Python 3.11 + uv + pip install -e .[gcg] - Add pyarrow>=22 pin for Python 3.14 compatibility The mlflow dependency caused Azure ML failures due to version incompatibility between mlflow 3.x and azureml-mlflow. Proper experiment logging will be added later via CentralMemory or Azure storage (tracked in plan). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pyproject.toml | 5 +- .../gcg/attack/base/attack_manager.py | 6 +- .../auxiliary_attacks/gcg/experiments/log.py | 49 +- .../gcg/experiments/train.py | 3 - pyrit/auxiliary_attacks/gcg/src/Dockerfile | 32 +- .../auxiliary_attacks/gcg/test_lifecycle.py | 61 +- tests/unit/auxiliary_attacks/gcg/test_log.py | 82 +- uv.lock | 896 ++++-------------- 8 files changed, 253 insertions(+), 881 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 601612ce5b..c35b562ae1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ dependencies = [ "confusable-homoglyphs>=3.3.1", "ecoji>=0.1.1", "datasets>=3.6.0", + "pyarrow>=22.0.0; python_version >= '3.14'", "fastapi>=0.115.0", "httpx[http2]>=0.27.2", "jinja2>=3.1.6", @@ -113,8 +114,6 @@ huggingface = [ gcg = [ "accelerate>=1.7.0", "azure-ai-ml>=1.27.1", - "azureml-mlflow>=1.60.0", - "mlflow>=3.11.1", "ml-collections>=1.1.0", "sentencepiece>=0.2.0", "torch>=2.7.0", @@ -141,11 +140,9 @@ all = [ "av>=14.0.0", "azure-ai-ml>=1.27.1", "azure-cognitiveservices-speech>=1.44.0", - "azureml-mlflow>=1.60.0", "flask>=3.1.3", "ipykernel>=6.29.5", "jupyter>=1.1.1", - "mlflow>=3.11.1", "ml-collections>=1.1.0", "ollama>=0.5.1", "opencv-python>=4.11.0.86", diff --git a/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py b/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py index eae2663a4e..79bf568d09 100644 --- a/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py +++ b/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py @@ -12,7 +12,6 @@ from copy import deepcopy from typing import Any, Optional -import mlflow import numpy as np import pandas as pd import torch @@ -902,14 +901,13 @@ def log( f"====================================================\n" ) - # Log to mlflow + # Log loss and GPU memory log_loss(step=step_num, loss=loss) log_gpu_memory(step=step_num) - # Log results table to mlflow + # Log results table at end of training if step_num == n_steps: log_table_summary(losses=log["losses"], controls=log["controls"], n_steps=n_steps) - mlflow.end_run() class ProgressiveMultiPromptAttack: diff --git a/pyrit/auxiliary_attacks/gcg/experiments/log.py b/pyrit/auxiliary_attacks/gcg/experiments/log.py index 1bbf65ac20..bdd96c1ca4 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/log.py +++ b/pyrit/auxiliary_attacks/gcg/experiments/log.py @@ -3,11 +3,8 @@ import logging import subprocess as sp -import time from typing import Any, Optional -import mlflow - logger = logging.getLogger(__name__) _DEFAULT_PARAM_KEYS: list[str] = [ @@ -26,7 +23,7 @@ def log_params( param_keys: Optional[list[str]] = None, ) -> None: """ - Log selected parameters to MLflow. + Log selected parameters via Python logging. Args: params (Any): A config object with a `to_dict()` method containing all parameters. @@ -34,20 +31,18 @@ def log_params( """ if param_keys is None: param_keys = _DEFAULT_PARAM_KEYS - mlflow_params = {key: params.to_dict()[key] for key in param_keys} - mlflow.log_params(mlflow_params) + logged_params = {key: params.to_dict()[key] for key in param_keys} + logger.info(f"Training parameters: {logged_params}") def log_train_goals(*, train_goals: list[str]) -> None: """ - Log training goals as a text artifact to MLflow. + Log training goals via Python logging. Args: train_goals (list[str]): The list of training goal strings to log. """ - timestamp = time.strftime("%Y%m%d-%H%M%S") - train_goals_str = "\n".join(train_goals) - mlflow.log_text(train_goals_str, f"train_goals_{timestamp}.txt") + logger.info(f"Training goals ({len(train_goals)}): {train_goals}") def get_gpu_memory() -> dict[str, int]: @@ -65,46 +60,40 @@ def get_gpu_memory() -> dict[str, int]: return memory_free_values -def log_gpu_memory(*, step: int, synchronous: bool = False) -> None: +def log_gpu_memory(*, step: int) -> None: """ - Log free GPU memory metrics to MLflow. + Log free GPU memory via Python logging. Args: step (int): The current training step number. - synchronous (bool): Whether to log synchronously. Defaults to False. """ - memory_values = get_gpu_memory() - for gpu, val in memory_values.items(): - mlflow.log_metric(gpu, val, step=step, synchronous=synchronous) + try: + memory_values = get_gpu_memory() + logger.info(f"Step {step} GPU memory: {memory_values}") + except Exception: + logger.debug("Could not query GPU memory (nvidia-smi not available)") -def log_loss(*, step: int, loss: float, synchronous: bool = False) -> None: +def log_loss(*, step: int, loss: float) -> None: """ - Log training loss to MLflow. + Log training loss via Python logging. Args: step (int): The current training step number. loss (float): The loss value to log. - synchronous (bool): Whether to log synchronously. Defaults to False. """ - mlflow.log_metric("loss", loss, step=step, synchronous=synchronous) + logger.info(f"Step {step} loss: {loss}") def log_table_summary(*, losses: list[float], controls: list[str], n_steps: int) -> None: """ - Log a summary table of losses and controls to MLflow. + Log a summary of losses and controls via Python logging. Args: losses (list[float]): Loss values for each step. controls (list[str]): Control strings for each step. n_steps (int): Total number of steps. """ - timestamp = time.strftime("%Y%m%d-%H%M%S") - mlflow.log_table( - { - "step": [i + 1 for i in range(n_steps)], - "loss": losses, - "control": controls, - }, - artifact_file=f"gcg_results_{timestamp}.json", - ) + logger.info(f"Training complete ({n_steps} steps). Final loss: {losses[-1] if losses else 'N/A'}") + if controls: + logger.info(f"Final control: {controls[-1]}") diff --git a/pyrit/auxiliary_attacks/gcg/experiments/train.py b/pyrit/auxiliary_attacks/gcg/experiments/train.py index 1eac86dc56..7adaace8fc 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/train.py +++ b/pyrit/auxiliary_attacks/gcg/experiments/train.py @@ -5,7 +5,6 @@ import time from typing import Any, Optional, Union -import mlflow import numpy as np import torch.multiprocessing as mp from ml_collections import config_dict @@ -166,8 +165,6 @@ def generate_suffix( ) logger.info(f"Parameters: {params}") - # Start mlflow logging - mlflow.start_run() log_gpu_memory(step=0) log_params(params=params) diff --git a/pyrit/auxiliary_attacks/gcg/src/Dockerfile b/pyrit/auxiliary_attacks/gcg/src/Dockerfile index 37163d3365..674d4c4fa1 100644 --- a/pyrit/auxiliary_attacks/gcg/src/Dockerfile +++ b/pyrit/auxiliary_attacks/gcg/src/Dockerfile @@ -1,19 +1,23 @@ -FROM mcr.microsoft.com/azureml/curated/acpt-pytorch-2.2-cuda12.1:37 +FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 -RUN apt-get update +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.11 python3.11-venv python3.11-dev python3-pip \ + curl git build-essential && \ + rm -rf /var/lib/apt/lists/* -ENV UV_ENVIRONMENT_PATH /azureml-envs/pyrit +# Install uv +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +ENV PATH="/root/.local/bin:$PATH" -# Prepend path to AzureML environment -ENV PATH $UV_ENVIRONMENT_PATH/bin:$PATH +# Create venv +RUN uv venv /opt/venv --python 3.11 +ENV PATH="/opt/venv/bin:$PATH" +ENV VIRTUAL_ENV="/opt/venv" -# Create uv environment -RUN uv venv --python 3.11 && \ - uv pip cache purge +WORKDIR /app -RUN uv pip list - -RUN git clone https://github.com/microsoft/PyRIT.git - -RUN cd PyRIT && uv sync --extra gcg && uv pip install git+https://github.com/lm-sys/FastChat.git@2c68a13bfe10b86f40e3eefc3fcfacb32c00b02a -RUN uv pip freeze +# Copy and install PyRIT with GCG extras +COPY pyproject.toml MANIFEST.in README.md LICENSE /app/ +COPY pyrit/ /app/pyrit/ +RUN uv pip install -e ".[gcg]" && \ + uv pip install "fschat @ git+https://github.com/lm-sys/FastChat.git@2c68a13bfe10b86f40e3eefc3fcfacb32c00b02a" diff --git a/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py b/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py index 7792c98224..99b820adec 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py +++ b/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py @@ -37,19 +37,13 @@ def test_get_gpu_memory_single_gpu(self, mock_sp: MagicMock) -> None: result = get_gpu_memory() assert result == {"gpu1_free_memory": 24000} - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") - def test_log_gpu_memory_logs_to_mlflow(self, mock_sp: MagicMock, mock_mlflow: MagicMock) -> None: - """Should log each GPU's free memory as an MLflow metric.""" + def test_log_gpu_memory_logs_via_logging(self, mock_sp: MagicMock) -> None: + """Should log GPU memory info without error.""" mock_sp.check_output.return_value = b"memory.free [MiB]\n8000 MiB\n16000 MiB\n" + # Should not raise log_gpu_memory(step=5) - assert mock_mlflow.log_metric.call_count == 2 - calls = mock_mlflow.log_metric.call_args_list - assert calls[0].args == ("gpu1_free_memory", 8000) - assert calls[0].kwargs["step"] == 5 - assert calls[1].args == ("gpu2_free_memory", 16000) - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") def test_get_gpu_memory_handles_nvidia_smi_failure(self, mock_sp: MagicMock) -> None: """Should propagate exception when nvidia-smi is not available.""" @@ -59,62 +53,17 @@ def test_get_gpu_memory_handles_nvidia_smi_failure(self, mock_sp: MagicMock) -> class TestGenerateSuffixLifecycle: - """Tests for generate_suffix MLflow and worker lifecycle management.""" - - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_workers") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_goals_and_targets") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_gpu_memory") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_params") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_train_goals") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.mlflow") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.attack_lib") - def test_mlflow_run_started_before_training( - self, - mock_attack_lib: MagicMock, - mock_mlflow: MagicMock, - mock_log_train_goals: MagicMock, - mock_log_params: MagicMock, - mock_log_gpu_memory: MagicMock, - mock_get_goals: MagicMock, - mock_get_workers: MagicMock, - ) -> None: - """MLflow run should be started before any training begins.""" - mock_get_goals.return_value = (["goal1"], ["target1"], [], []) - mock_worker = MagicMock() - mock_worker.model.name_or_path = "test-model" - mock_worker.tokenizer.name_or_path = "test-tokenizer" - mock_worker.conv_template.name = "test-template" - mock_get_workers.return_value = ([mock_worker], []) - - mock_attack_instance = MagicMock() - mock_attack_lib.GCGAttackPrompt = MagicMock - mock_attack_lib.GCGPromptManager = MagicMock - mock_attack_lib.GCGMultiPromptAttack = MagicMock - - # Patch _create_attack to avoid IndividualPromptAttack's logfile writing - with patch.object(Generator, "_create_attack", return_value=mock_attack_instance): - generator = Generator.__new__(Generator) - generator.generate_suffix( - tokenizer_paths=["test/path"], - model_paths=["test/path"], - conversation_templates=["llama-2"], - train_data="", - n_steps=1, - ) - - mock_mlflow.start_run.assert_called_once() + """Tests for generate_suffix worker lifecycle management.""" @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_workers") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_goals_and_targets") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_gpu_memory") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_params") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_train_goals") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.mlflow") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.attack_lib") def test_workers_stopped_after_training( self, mock_attack_lib: MagicMock, - mock_mlflow: MagicMock, mock_log_train_goals: MagicMock, mock_log_params: MagicMock, mock_log_gpu_memory: MagicMock, @@ -156,12 +105,10 @@ def test_workers_stopped_after_training( @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_gpu_memory") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_params") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_train_goals") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.mlflow") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.attack_lib") def test_workers_not_stopped_on_training_failure( self, mock_attack_lib: MagicMock, - mock_mlflow: MagicMock, mock_log_train_goals: MagicMock, mock_log_params: MagicMock, mock_log_gpu_memory: MagicMock, diff --git a/tests/unit/auxiliary_attacks/gcg/test_log.py b/tests/unit/auxiliary_attacks/gcg/test_log.py index e20b5a7c13..e30f96c460 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_log.py +++ b/tests/unit/auxiliary_attacks/gcg/test_log.py @@ -1,13 +1,13 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock import pytest log_mod = pytest.importorskip( "pyrit.auxiliary_attacks.gcg.experiments.log", - reason="GCG optional dependencies (mlflow, etc.) not installed", + reason="GCG optional dependencies not installed", ) log_loss = log_mod.log_loss log_params = log_mod.log_params @@ -18,9 +18,8 @@ class TestLogParams: """Tests for the log_params function.""" - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_default_param_keys(self, mock_mlflow: MagicMock) -> None: - """Should log the default parameter keys to MLflow.""" + def test_logs_default_param_keys(self) -> None: + """Should extract default parameter keys without error.""" params = MagicMock() params.to_dict.return_value = { "model_name": "test_model", @@ -32,89 +31,48 @@ def test_logs_default_param_keys(self, mock_mlflow: MagicMock) -> None: "extra_param": "ignored", } + # Should not raise log_params(params=params) - mock_mlflow.log_params.assert_called_once() - logged_params = mock_mlflow.log_params.call_args[0][0] - assert logged_params == { - "model_name": "test_model", - "transfer": False, - "n_train_data": 50, - "n_test_data": 10, - "n_steps": 100, - "batch_size": 512, - } - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_custom_param_keys(self, mock_mlflow: MagicMock) -> None: - """Should log only the specified parameter keys.""" + def test_logs_custom_param_keys(self) -> None: + """Should accept custom parameter keys.""" params = MagicMock() params.to_dict.return_value = { "model_name": "test_model", "batch_size": 256, } + # Should not raise log_params(params=params, param_keys=["model_name", "batch_size"]) - logged_params = mock_mlflow.log_params.call_args[0][0] - assert logged_params == {"model_name": "test_model", "batch_size": 256} - class TestLogTrainGoals: """Tests for the log_train_goals function.""" - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_goals_as_text(self, mock_mlflow: MagicMock) -> None: - """Should log training goals joined by newlines.""" - goals = ["goal1", "goal2", "goal3"] - - log_train_goals(train_goals=goals) + def test_logs_goals(self) -> None: + """Should log training goals without error.""" + log_train_goals(train_goals=["goal1", "goal2", "goal3"]) - mock_mlflow.log_text.assert_called_once() - logged_text = mock_mlflow.log_text.call_args[0][0] - assert logged_text == "goal1\ngoal2\ngoal3" - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_empty_goals(self, mock_mlflow: MagicMock) -> None: + def test_logs_empty_goals(self) -> None: """Should handle empty goals list.""" log_train_goals(train_goals=[]) - mock_mlflow.log_text.assert_called_once() - logged_text = mock_mlflow.log_text.call_args[0][0] - assert logged_text == "" - class TestLogLoss: """Tests for the log_loss function.""" - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_loss_metric(self, mock_mlflow: MagicMock) -> None: - """Should log loss as an MLflow metric.""" + def test_logs_loss(self) -> None: + """Should log loss without error.""" log_loss(step=5, loss=0.123) - mock_mlflow.log_metric.assert_called_once_with("loss", 0.123, step=5, synchronous=False) - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_loss_synchronously(self, mock_mlflow: MagicMock) -> None: - """Should support synchronous logging.""" - log_loss(step=1, loss=0.5, synchronous=True) - - mock_mlflow.log_metric.assert_called_once_with("loss", 0.5, step=1, synchronous=True) - class TestLogTableSummary: """Tests for the log_table_summary function.""" - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_table_with_correct_data(self, mock_mlflow: MagicMock) -> None: - """Should log a table with step numbers, losses, and controls.""" - losses = [0.5, 0.3, 0.1] - controls = ["ctrl1", "ctrl2", "ctrl3"] - - log_table_summary(losses=losses, controls=controls, n_steps=3) + def test_logs_table_summary(self) -> None: + """Should log summary without error.""" + log_table_summary(losses=[0.5, 0.3, 0.1], controls=["ctrl1", "ctrl2", "ctrl3"], n_steps=3) - mock_mlflow.log_table.assert_called_once() - logged_data = mock_mlflow.log_table.call_args[0][0] - assert logged_data["step"] == [1, 2, 3] - assert logged_data["loss"] == [0.5, 0.3, 0.1] - assert logged_data["control"] == ["ctrl1", "ctrl2", "ctrl3"] + def test_logs_empty_summary(self) -> None: + """Should handle empty losses and controls.""" + log_table_summary(losses=[], controls=[], n_steps=0) diff --git a/uv.lock b/uv.lock index a52eba0aa6..1bb0f8092f 100644 --- a/uv.lock +++ b/uv.lock @@ -6,14 +6,18 @@ resolution-markers = [ "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'darwin'", "python_full_version >= '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", - "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.14' and sys_platform == 'win32'", + "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'win32'", "python_full_version == '3.11.*' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version == '3.11.*' and sys_platform == 'win32'", "python_full_version < '3.11' and sys_platform == 'darwin'", "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version < '3.11' and sys_platform == 'win32'", ] [manifest] @@ -630,27 +634,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/41/f03fca7144b4a20419cdec03feb2020d3a44fe815de9ee52f089dc59ba95/azure_storage_file_share-12.23.1-py3-none-any.whl", hash = "sha256:10f83e82e68fb071bed3302614e35027ac9e03b63d0d850b81671e08936b0134", size = 307616, upload-time = "2025-10-29T13:39:46.896Z" }, ] -[[package]] -name = "azureml-mlflow" -version = "1.60.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "azure-common" }, - { name = "azure-core" }, - { name = "azure-identity" }, - { name = "azure-mgmt-core" }, - { name = "azure-storage-blob" }, - { name = "cryptography" }, - { name = "jsonpickle" }, - { name = "mlflow-skinny" }, - { name = "msrest" }, - { name = "python-dateutil" }, - { name = "pytz" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/6f/cb/b1f1ac36949958a15707f0d0f361e0a610523dc44cf82e64a404acb9797c/azureml_mlflow-1.60.0-py3-none-any.whl", hash = "sha256:9074fa389cf24f16f3aff7d7cda62a658c93b65a4aecc3dd50a5f1e45909687f", size = 1020403, upload-time = "2025-04-11T20:16:27.116Z" }, -] - [[package]] name = "babel" version = "2.17.0" @@ -766,15 +749,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0a/de/acae8e9f9a1f4bb393d41c8265898b0f29772e38eac14e9f69d191e2c006/blis-1.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:9e5fdf4211b1972400f8ff6dafe87cb689c5d84f046b4a76b207c0bd2270faaf", size = 6324695, upload-time = "2025-11-17T12:28:28.401Z" }, ] -[[package]] -name = "cachetools" -version = "6.2.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bc/1d/ede8680603f6016887c062a2cf4fc8fdba905866a3ab8831aa8aa651320c/cachetools-6.2.4.tar.gz", hash = "sha256:82c5c05585e70b6ba2d3ae09ea60b79548872185d2f24ae1f2709d37299fd607", size = 31731, upload-time = "2025-12-15T18:24:53.744Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/fc/1d7b80d0eb7b714984ce40efc78859c022cd930e402f599d8ca9e39c78a4/cachetools-6.2.4-py3-none-any.whl", hash = "sha256:69a7a52634fed8b8bf6e24a050fb60bff1c9bd8f6d24572b99c32d4e71e62a51", size = 11551, upload-time = "2025-12-15T18:24:52.332Z" }, -] - [[package]] name = "catalogue" version = "2.0.10" @@ -1040,15 +1014,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ae/8a/c4bb04426d608be4a3171efa2e233d2c59a5c8937850c10d098e126df18e/cloudpathlib-0.23.0-py3-none-any.whl", hash = "sha256:8520b3b01468fee77de37ab5d50b1b524ea6b4a8731c35d1b7407ac0cd716002", size = 62755, upload-time = "2025-10-07T22:47:54.905Z" }, ] -[[package]] -name = "cloudpickle" -version = "3.1.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/27/fb/576f067976d320f5f0114a8d9fa1215425441bb35627b1993e5afd8111e5/cloudpickle-3.1.2.tar.gz", hash = "sha256:7fda9eb655c9c230dab534f1983763de5835249750e85fbcef43aaa30a9a2414", size = 22330, upload-time = "2025-11-03T09:25:26.604Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" }, -] - [[package]] name = "colorama" version = "0.4.6" @@ -1104,7 +1069,8 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version < '3.11' and sys_platform == 'darwin'", "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version < '3.11' and sys_platform == 'win32'", ] dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, @@ -1178,11 +1144,14 @@ resolution-markers = [ "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'darwin'", "python_full_version >= '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", - "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.14' and sys_platform == 'win32'", + "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'win32'", "python_full_version == '3.11.*' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version == '3.11.*' and sys_platform == 'win32'", ] dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and python_full_version < '3.14'" }, @@ -1500,45 +1469,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/66/66/150e406a2db5535533aa3c946de58f0371f2e412e23f050c704588023e6e/cymem-2.0.13-cp314-cp314t-win_arm64.whl", hash = "sha256:e9027764dc5f1999fb4b4cabee1d0322c59e330c0a6485b436a68275f614277f", size = 39715, upload-time = "2025-11-14T14:58:24.773Z" }, ] -[[package]] -name = "databricks-sdk" -version = "0.76.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "google-auth" }, - { name = "protobuf" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/70/82/5efcfdca8779c84b5c6f61cc110d0938c9818e422f55c36a68d96b98c61f/databricks_sdk-0.76.0.tar.gz", hash = "sha256:fcfce4561b090b3c8e9cac2101f549766d9fb3bece31bb5720571919fa37d210", size = 822376, upload-time = "2025-12-17T17:11:31.907Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/96/ee7742b94f996560c57d6fb8d2e10eab3c489e8a72187369ed0917baf8aa/databricks_sdk-0.76.0-py3-none-any.whl", hash = "sha256:6696dda22bc52c8f50a50d24e6ccd1c855f92c0f68f5afe4eb2e77d5b1b1a65f", size = 774688, upload-time = "2025-12-17T17:11:29.925Z" }, -] - [[package]] name = "datasets" -version = "4.4.2" +version = "4.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "dill" }, { name = "filelock" }, { name = "fsspec", extra = ["http"] }, - { name = "httpx" }, { name = "huggingface-hub" }, { name = "multiprocess" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, { name = "packaging" }, { name = "pandas" }, - { name = "pyarrow", version = "21.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "pyarrow", version = "22.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "pyarrow", version = "19.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, + { name = "pyarrow", version = "24.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, { name = "pyyaml" }, { name = "requests" }, { name = "tqdm" }, { name = "xxhash" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c4/54/9359803da96bc65439a28fbb014dc2c90b7d4d8034a93b72362b0d40191f/datasets-4.4.2.tar.gz", hash = "sha256:9de16e415c4ba4713eac0493f7c7dc74f3aa21599297f00cc6ddab409cb7b24b", size = 586474, upload-time = "2025-12-19T15:03:09.129Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e3/9d/348ed92110ba5f9b70b51ca1078d4809767a835aa2b7ce7e74ad2b98323d/datasets-4.0.0.tar.gz", hash = "sha256:9657e7140a9050db13443ba21cb5de185af8af944479b00e7ff1e00a61c8dbf1", size = 569566, upload-time = "2025-07-09T14:35:52.431Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/b5/fefa518c809de7bced5cddb7c21c010da66fa2ae494bda96844a280cc6ce/datasets-4.4.2-py3-none-any.whl", hash = "sha256:6f5ef3417504d9cd663c71c1b90b9a494ff4c2076a2cd6a6e40ceee6ad95befc", size = 512268, upload-time = "2025-12-19T15:03:07.087Z" }, + { url = "https://files.pythonhosted.org/packages/eb/62/eb8157afb21bd229c864521c1ab4fa8e9b4f1b06bafdd8c4668a7a31b5dd/datasets-4.0.0-py3-none-any.whl", hash = "sha256:7ef95e62025fd122882dbce6cb904c8cd3fbc829de6669a5eb939c77d50e203d", size = 494825, upload-time = "2025-07-09T14:35:50.658Z" }, ] [[package]] @@ -1605,11 +1559,11 @@ wheels = [ [[package]] name = "dill" -version = "0.4.0" +version = "0.3.8" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" } +sdist = { url = "https://files.pythonhosted.org/packages/17/4d/ac7ffa80c69ea1df30a8aa11b3578692a5118e7cd1aa157e3ef73b092d15/dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", size = 184847, upload-time = "2024-01-27T23:42:16.145Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" }, + { url = "https://files.pythonhosted.org/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7", size = 116252, upload-time = "2024-01-27T23:42:14.239Z" }, ] [[package]] @@ -1630,20 +1584,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, ] -[[package]] -name = "docker" -version = "7.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pywin32", marker = "sys_platform == 'win32'" }, - { name = "requests" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/91/9b/4a2ea29aeba62471211598dac5d96825bb49348fa07e906ea930394a83ce/docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c", size = 117834, upload-time = "2024-05-23T11:13:57.216Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774, upload-time = "2024-05-23T11:13:55.01Z" }, -] - [[package]] name = "ecoji" version = "0.1.1" @@ -1749,19 +1689,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7f/9c/34f6962f9b9e9c71f6e5ed806e0d0ff03c9d1b0b2340088a0cf4bce09b18/flask-3.1.3-py3-none-any.whl", hash = "sha256:f4bcbefc124291925f1a26446da31a5178f9483862233b23c0c96a20701f670c", size = 103424, upload-time = "2026-02-19T05:00:56.027Z" }, ] -[[package]] -name = "flask-cors" -version = "6.0.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "flask" }, - { name = "werkzeug" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/70/74/0fc0fa68d62f21daef41017dafab19ef4b36551521260987eb3a5394c7ba/flask_cors-6.0.2.tar.gz", hash = "sha256:6e118f3698249ae33e429760db98ce032a8bf9913638d085ca0f4c5534ad2423", size = 13472, upload-time = "2025-12-12T20:31:42.861Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4f/af/72ad54402e599152de6d067324c46fe6a4f531c7c65baf7e96c63db55eaf/flask_cors-6.0.2-py3-none-any.whl", hash = "sha256:e57544d415dfd7da89a9564e1e3a9e515042df76e12130641ca6f3f2f03b699a", size = 13257, upload-time = "2025-12-12T20:31:41.3Z" }, -] - [[package]] name = "fonttools" version = "4.61.1" @@ -1951,11 +1878,11 @@ wheels = [ [[package]] name = "fsspec" -version = "2025.10.0" +version = "2025.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/24/7f/2747c0d332b9acfa75dc84447a066fdf812b5a6b8d30472b74d309bfe8cb/fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59", size = 309285, upload-time = "2025-10-30T14:58:44.036Z" } +sdist = { url = "https://files.pythonhosted.org/packages/34/f4/5721faf47b8c499e776bc34c6a8fc17efdf7fdef0b00f398128bc5dcb4ac/fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972", size = 298491, upload-time = "2025-03-07T21:47:56.461Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" }, + { url = "https://files.pythonhosted.org/packages/56/53/eb690efa8513166adef3e0669afd31e95ffde69fb3c52ec2ac7223ed6018/fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3", size = 193615, upload-time = "2025-03-07T21:47:54.809Z" }, ] [package.optional-dependencies] @@ -1963,80 +1890,6 @@ http = [ { name = "aiohttp" }, ] -[[package]] -name = "gitdb" -version = "4.0.12" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "smmap" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684, upload-time = "2025-01-02T07:20:46.413Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794, upload-time = "2025-01-02T07:20:43.624Z" }, -] - -[[package]] -name = "gitpython" -version = "3.1.46" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "gitdb" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/df/b5/59d16470a1f0dfe8c793f9ef56fd3826093fc52b3bd96d6b9d6c26c7e27b/gitpython-3.1.46.tar.gz", hash = "sha256:400124c7d0ef4ea03f7310ac2fbf7151e09ff97f2a3288d64a440c584a29c37f", size = 215371, upload-time = "2026-01-01T15:37:32.073Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058", size = 208620, upload-time = "2026-01-01T15:37:30.574Z" }, -] - -[[package]] -name = "google-auth" -version = "2.48.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cryptography" }, - { name = "pyasn1-modules" }, - { name = "rsa" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/0c/41/242044323fbd746615884b1c16639749e73665b718209946ebad7ba8a813/google_auth-2.48.0.tar.gz", hash = "sha256:4f7e706b0cd3208a3d940a19a822c37a476ddba5450156c3e6624a71f7c841ce", size = 326522, upload-time = "2026-01-26T19:22:47.157Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/83/1d/d6466de3a5249d35e832a52834115ca9d1d0de6abc22065f049707516d47/google_auth-2.48.0-py3-none-any.whl", hash = "sha256:2e2a537873d449434252a9632c28bfc268b0adb1e53f9fb62afc5333a975903f", size = 236499, upload-time = "2026-01-26T19:22:45.099Z" }, -] - -[[package]] -name = "graphene" -version = "3.4.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "graphql-core" }, - { name = "graphql-relay" }, - { name = "python-dateutil" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/cc/f6/bf62ff950c317ed03e77f3f6ddd7e34aaa98fe89d79ebd660c55343d8054/graphene-3.4.3.tar.gz", hash = "sha256:2a3786948ce75fe7e078443d37f609cbe5bb36ad8d6b828740ad3b95ed1a0aaa", size = 44739, upload-time = "2024-11-09T20:44:25.757Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/66/e0/61d8e98007182e6b2aca7cf65904721fb2e4bce0192272ab9cb6f69d8812/graphene-3.4.3-py2.py3-none-any.whl", hash = "sha256:820db6289754c181007a150db1f7fff544b94142b556d12e3ebc777a7bf36c71", size = 114894, upload-time = "2024-11-09T20:44:23.851Z" }, -] - -[[package]] -name = "graphql-core" -version = "3.2.7" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ac/9b/037a640a2983b09aed4a823f9cf1729e6d780b0671f854efa4727a7affbe/graphql_core-3.2.7.tar.gz", hash = "sha256:27b6904bdd3b43f2a0556dad5d579bdfdeab1f38e8e8788e555bdcb586a6f62c", size = 513484, upload-time = "2025-11-01T22:30:40.436Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/14/933037032608787fb92e365883ad6a741c235e0ff992865ec5d904a38f1e/graphql_core-3.2.7-py3-none-any.whl", hash = "sha256:17fc8f3ca4a42913d8e24d9ac9f08deddf0a0b2483076575757f6c412ead2ec0", size = 207262, upload-time = "2025-11-01T22:30:38.912Z" }, -] - -[[package]] -name = "graphql-relay" -version = "3.2.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "graphql-core" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d1/13/98fbf8d67552f102488ffc16c6f559ce71ea15f6294728d33928ab5ff14d/graphql-relay-3.2.0.tar.gz", hash = "sha256:1ff1c51298356e481a0be009ccdff249832ce53f30559c1338f22a0e0d17250c", size = 50027, upload-time = "2022-04-16T11:03:45.447Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/74/16/a4cf06adbc711bd364a73ce043b0b08d8fa5aae3df11b6ee4248bcdad2e0/graphql_relay-3.2.0-py3-none-any.whl", hash = "sha256:c9b22bd28b170ba1fe674c74384a8ff30a76c8e26f88ac3aa1584dd3179953e5", size = 16940, upload-time = "2022-04-16T11:03:43.895Z" }, -] - [[package]] name = "greenlet" version = "3.3.0" @@ -2127,18 +1980,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/51/c936033e16d12b627ea334aaaaf42229c37620d0f15593456ab69ab48161/griffelib-2.0.0-py3-none-any.whl", hash = "sha256:01284878c966508b6d6f1dbff9b6fa607bc062d8261c5c7253cb285b06422a7f", size = 142004, upload-time = "2026-02-09T19:09:40.561Z" }, ] -[[package]] -name = "gunicorn" -version = "23.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "packaging" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/34/72/9614c465dc206155d93eff0ca20d42e1e35afc533971379482de953521a4/gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec", size = 375031, upload-time = "2024-08-10T20:25:27.378Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/7d/6dac2a6e1eba33ee43f318edbed4ff29151a49b5d37f080aad1e6469bca4/gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d", size = 85029, upload-time = "2024-08-10T20:25:24.996Z" }, -] - [[package]] name = "h11" version = "0.16.0" @@ -2275,15 +2116,6 @@ http2 = [ { name = "h2" }, ] -[[package]] -name = "huey" -version = "2.6.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fe/29/3428d52eb8e85025e264a291641a9f9d6407cc1e51d1b630f6ac5815999a/huey-2.6.0.tar.gz", hash = "sha256:8d11f8688999d65266af1425b831f6e3773e99415027177b8734b0ffd5e251f6", size = 221068, upload-time = "2026-01-06T03:01:02.055Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1a/34/fae9ac8f1c3a552fd3f7ff652b94c78d219dedc5fce0c0a4232457760a00/huey-2.6.0-py3-none-any.whl", hash = "sha256:1b9df9d370b49c6d5721ba8a01ac9a787cf86b3bdc584e4679de27b920395c3f", size = 76951, upload-time = "2026-01-06T03:01:00.808Z" }, -] - [[package]] name = "huggingface-hub" version = "0.36.0" @@ -2383,7 +2215,8 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version < '3.11' and sys_platform == 'darwin'", "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version < '3.11' and sys_platform == 'win32'", ] dependencies = [ { name = "colorama", marker = "python_full_version < '3.11' and sys_platform == 'win32'" }, @@ -2412,11 +2245,14 @@ resolution-markers = [ "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'darwin'", "python_full_version >= '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", - "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.14' and sys_platform == 'win32'", + "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'win32'", "python_full_version == '3.11.*' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version == '3.11.*' and sys_platform == 'win32'", ] dependencies = [ { name = "colorama", marker = "python_full_version >= '3.11' and sys_platform == 'win32'" }, @@ -2616,15 +2452,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2f/9c/6753e6522b8d0ef07d3a3d239426669e984fb0eba15a315cdbc1253904e4/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24e864cb30ab82311c6425655b0cdab0a98c5d973b065c66a3f020740c2324c", size = 346110, upload-time = "2025-11-09T20:49:21.817Z" }, ] -[[package]] -name = "joblib" -version = "1.5.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, -] - [[package]] name = "json5" version = "0.13.0" @@ -2634,15 +2461,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/9e/038522f50ceb7e74f1f991bf1b699f24b0c2bbe7c390dd36ad69f4582258/json5-0.13.0-py3-none-any.whl", hash = "sha256:9a08e1dd65f6a4d4c6fa82d216cf2477349ec2346a38fd70cc11d2557499fbcc", size = 36163, upload-time = "2026-01-01T19:42:13.962Z" }, ] -[[package]] -name = "jsonpickle" -version = "4.1.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e4/a6/d07afcfdef402900229bcca795f80506b207af13a838d4d99ad45abf530c/jsonpickle-4.1.1.tar.gz", hash = "sha256:f86e18f13e2b96c1c1eede0b7b90095bbb61d99fedc14813c44dc2f361dbbae1", size = 316885, upload-time = "2025-06-02T20:36:11.57Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/73/04df8a6fa66d43a9fd45c30f283cc4afff17da671886e451d52af60bdc7e/jsonpickle-4.1.1-py3-none-any.whl", hash = "sha256:bb141da6057898aa2438ff268362b126826c812a1721e31cf08a6e142910dc91", size = 47125, upload-time = "2025-06-02T20:36:08.647Z" }, -] - [[package]] name = "jsonpointer" version = "3.0.0" @@ -3424,90 +3242,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ab/8a/18d4ff2c7bd83f30d6924bd4ad97abf418488c3f908dea228d6f0961ad68/ml_collections-1.1.0-py3-none-any.whl", hash = "sha256:23b6fa4772aac1ae745a96044b925a5746145a70734f087eaca6626e92c05cbc", size = 76707, upload-time = "2025-04-17T08:24:59.038Z" }, ] -[[package]] -name = "mlflow" -version = "3.11.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiohttp" }, - { name = "alembic" }, - { name = "cryptography" }, - { name = "docker" }, - { name = "flask" }, - { name = "flask-cors" }, - { name = "graphene" }, - { name = "gunicorn", marker = "sys_platform != 'win32'" }, - { name = "huey" }, - { name = "matplotlib" }, - { name = "mlflow-skinny" }, - { name = "mlflow-tracing" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, - { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, - { name = "pandas" }, - { name = "pyarrow", version = "21.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "pyarrow", version = "22.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, - { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "skops" }, - { name = "sqlalchemy" }, - { name = "waitress", marker = "sys_platform == 'win32'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e9/34/e328c073cd32c186fb242a957e5bade82433c06bc45b7d1695bf4d02f166/mlflow-3.11.1.tar.gz", hash = "sha256:84e54c4be91b5b2a19039a2673fe688b1d7307ceddacc08af51f8df05b19ee56", size = 9797469, upload-time = "2026-04-07T14:26:58.463Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/62/96826c340354638dfedcbdbcd35d67754566bd45f6592300e0c215c80e30/mlflow-3.11.1-py3-none-any.whl", hash = "sha256:8f6bf1238ac04f97664c229dd480380c5c254a78bdb3c0e433e3a0397508b1af", size = 10479141, upload-time = "2026-04-07T14:26:55.709Z" }, -] - -[[package]] -name = "mlflow-skinny" -version = "3.11.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cachetools" }, - { name = "click" }, - { name = "cloudpickle" }, - { name = "databricks-sdk" }, - { name = "fastapi" }, - { name = "gitpython" }, - { name = "importlib-metadata" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-proto" }, - { name = "opentelemetry-sdk" }, - { name = "packaging" }, - { name = "protobuf" }, - { name = "pydantic" }, - { name = "python-dotenv" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "sqlparse" }, - { name = "typing-extensions" }, - { name = "uvicorn" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/40/77/fe2027ddad9e52ed1ac360fbc262169e6366f6678632e350cbd0d901bb9b/mlflow_skinny-3.11.1.tar.gz", hash = "sha256:86ce63491349f6713afc8a4ef0bf77a8314d0e79e03753cb150d6c860a0b0475", size = 2642799, upload-time = "2026-04-07T14:26:43.818Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/a7/e61ec397b34dc3c9e91572f45e41617f429d5c524d38a4e1aa2316ee1b5e/mlflow_skinny-3.11.1-py3-none-any.whl", hash = "sha256:82ffd5f6980320b4ac19f741e7a754faa1d01707e632b002ea68e04fd25a0535", size = 3171551, upload-time = "2026-04-07T14:26:41.762Z" }, -] - -[[package]] -name = "mlflow-tracing" -version = "3.11.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cachetools" }, - { name = "databricks-sdk" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-proto" }, - { name = "opentelemetry-sdk" }, - { name = "packaging" }, - { name = "protobuf" }, - { name = "pydantic" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1b/77/73af163432f3c66e2d213045250972e504a6683c76f63dd1abfba441a16a/mlflow_tracing-3.11.1.tar.gz", hash = "sha256:cb63cee16385d081467ec5bee4807fe1af59ddfdf04be4c79e7a7813b1002193", size = 1314550, upload-time = "2026-04-07T14:26:32.785Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/62/ab/d980c84e7df4224ab8db2457afbe135b430f371ca081a37cf89f8ef18ca1/mlflow_tracing-3.11.1-py3-none-any.whl", hash = "sha256:fa82df64dacf8293b714ae666440fe7c1902c6470c024df389bb91e9de3106d9", size = 1575790, upload-time = "2026-04-07T14:26:30.804Z" }, -] - [[package]] name = "mock-alchemy" version = "0.2.6" @@ -3711,25 +3445,20 @@ wheels = [ [[package]] name = "multiprocess" -version = "0.70.18" +version = "0.70.16" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "dill" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/ae/04f39c5d0d0def03247c2893d6f2b83c136bf3320a2154d7b8858f2ba72d/multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1", size = 1772603, upload-time = "2024-01-28T18:52:34.85Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/f8/7f9a8f08bf98cea1dfaa181e05cc8bbcb59cecf044b5a9ac3cce39f9c449/multiprocess-0.70.18-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:25d4012dcaaf66b9e8e955f58482b42910c2ee526d532844d8bcf661bbc604df", size = 135083, upload-time = "2025-04-17T03:11:04.223Z" }, - { url = "https://files.pythonhosted.org/packages/e5/03/b7b10dbfc17b2b3ce07d4d30b3ba8367d0ed32d6d46cd166e298f161dd46/multiprocess-0.70.18-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:06b19433de0d02afe5869aec8931dd5c01d99074664f806c73896b0d9e527213", size = 135128, upload-time = "2025-04-17T03:11:06.045Z" }, - { url = "https://files.pythonhosted.org/packages/c1/a3/5f8d3b9690ea5580bee5868ab7d7e2cfca74b7e826b28192b40aa3881cdc/multiprocess-0.70.18-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6fa1366f994373aaf2d4738b0f56e707caeaa05486e97a7f71ee0853823180c2", size = 135132, upload-time = "2025-04-17T03:11:07.533Z" }, - { url = "https://files.pythonhosted.org/packages/55/4d/9af0d1279c84618bcd35bf5fd7e371657358c7b0a523e54a9cffb87461f8/multiprocess-0.70.18-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8b8940ae30139e04b076da6c5b83e9398585ebdf0f2ad3250673fef5b2ff06d6", size = 144695, upload-time = "2025-04-17T03:11:09.161Z" }, - { url = "https://files.pythonhosted.org/packages/17/bf/87323e79dd0562474fad3373c21c66bc6c3c9963b68eb2a209deb4c8575e/multiprocess-0.70.18-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:0929ba95831adb938edbd5fb801ac45e705ecad9d100b3e653946b7716cb6bd3", size = 144742, upload-time = "2025-04-17T03:11:10.072Z" }, - { url = "https://files.pythonhosted.org/packages/dd/74/cb8c831e58dc6d5cf450b17c7db87f14294a1df52eb391da948b5e0a0b94/multiprocess-0.70.18-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4d77f8e4bfe6c6e2e661925bbf9aed4d5ade9a1c6502d5dfc10129b9d1141797", size = 144745, upload-time = "2025-04-17T03:11:11.453Z" }, - { url = "https://files.pythonhosted.org/packages/ba/d8/0cba6cf51a1a31f20471fbc823a716170c73012ddc4fb85d706630ed6e8f/multiprocess-0.70.18-py310-none-any.whl", hash = "sha256:60c194974c31784019c1f459d984e8f33ee48f10fcf42c309ba97b30d9bd53ea", size = 134948, upload-time = "2025-04-17T03:11:20.223Z" }, - { url = "https://files.pythonhosted.org/packages/4b/88/9039f2fed1012ef584751d4ceff9ab4a51e5ae264898f0b7cbf44340a859/multiprocess-0.70.18-py311-none-any.whl", hash = "sha256:5aa6eef98e691281b3ad923be2832bf1c55dd2c859acd73e5ec53a66aae06a1d", size = 144462, upload-time = "2025-04-17T03:11:21.657Z" }, - { url = "https://files.pythonhosted.org/packages/bf/b6/5f922792be93b82ec6b5f270bbb1ef031fd0622847070bbcf9da816502cc/multiprocess-0.70.18-py312-none-any.whl", hash = "sha256:9b78f8e5024b573730bfb654783a13800c2c0f2dfc0c25e70b40d184d64adaa2", size = 150287, upload-time = "2025-04-17T03:11:22.69Z" }, - { url = "https://files.pythonhosted.org/packages/ee/25/7d7e78e750bc1aecfaf0efbf826c69a791d2eeaf29cf20cba93ff4cced78/multiprocess-0.70.18-py313-none-any.whl", hash = "sha256:871743755f43ef57d7910a38433cfe41319e72be1bbd90b79c7a5ac523eb9334", size = 151917, upload-time = "2025-04-17T03:11:24.044Z" }, - { url = "https://files.pythonhosted.org/packages/3b/c3/ca84c19bd14cdfc21c388fdcebf08b86a7a470ebc9f5c3c084fc2dbc50f7/multiprocess-0.70.18-py38-none-any.whl", hash = "sha256:dbf705e52a154fe5e90fb17b38f02556169557c2dd8bb084f2e06c2784d8279b", size = 132636, upload-time = "2025-04-17T03:11:24.936Z" }, - { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" }, + { url = "https://files.pythonhosted.org/packages/ef/76/6e712a2623d146d314f17598df5de7224c85c0060ef63fd95cc15a25b3fa/multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee", size = 134980, upload-time = "2024-01-28T18:52:15.731Z" }, + { url = "https://files.pythonhosted.org/packages/0f/ab/1e6e8009e380e22254ff539ebe117861e5bdb3bff1fc977920972237c6c7/multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec", size = 134982, upload-time = "2024-01-28T18:52:17.783Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f7/7ec7fddc92e50714ea3745631f79bd9c96424cb2702632521028e57d3a36/multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", size = 134824, upload-time = "2024-01-28T18:52:26.062Z" }, + { url = "https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", size = 143519, upload-time = "2024-01-28T18:52:28.115Z" }, + { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741, upload-time = "2024-01-28T18:52:29.395Z" }, + { url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628, upload-time = "2024-01-28T18:52:30.853Z" }, + { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload-time = "2024-01-28T18:52:31.981Z" }, ] [[package]] @@ -3867,7 +3596,8 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version < '3.11' and sys_platform == 'darwin'", "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version < '3.11' and sys_platform == 'win32'", ] sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368, upload-time = "2024-10-21T12:39:38.695Z" } wheels = [ @@ -3883,11 +3613,14 @@ resolution-markers = [ "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'darwin'", "python_full_version >= '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", - "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.14' and sys_platform == 'win32'", + "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'win32'", "python_full_version == '3.11.*' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version == '3.11.*' and sys_platform == 'win32'", ] sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" } wheels = [ @@ -3938,13 +3671,16 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'darwin'", "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'win32'", "python_full_version == '3.11.*' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version == '3.11.*' and sys_platform == 'win32'", "python_full_version < '3.11' and sys_platform == 'darwin'", "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version < '3.11' and sys_platform == 'win32'", ] sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } wheels = [ @@ -4011,7 +3747,8 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'darwin'", "python_full_version >= '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.14' and sys_platform == 'win32'", ] sdist = { url = "https://files.pythonhosted.org/packages/d7/9f/b8cef5bffa569759033adda9481211426f12f53299629b410340795c2514/numpy-2.4.4.tar.gz", hash = "sha256:2d390634c5182175533585cc89f3608a4682ccb173cc9bb940b2881c8d6f8fa0", size = 20731587, upload-time = "2026-03-29T13:22:01.298Z" } wheels = [ @@ -4125,7 +3862,7 @@ name = "nvidia-cudnn-cu12" version = "9.10.2.21" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, @@ -4136,7 +3873,7 @@ name = "nvidia-cufft-cu12" version = "11.3.3.83" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, @@ -4163,9 +3900,9 @@ name = "nvidia-cusolver-cu12" version = "11.7.3.90" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, - { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, - { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')" }, + { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, @@ -4176,7 +3913,7 @@ name = "nvidia-cusparse-cu12" version = "12.5.8.93" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, @@ -4476,18 +4213,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/93/98/c637d9e5cab1355d6765de2304199a1d79a43aa94c33d8eddb475327d81a/opentelemetry_instrumentation_wsgi-0.60b1-py3-none-any.whl", hash = "sha256:5e7b432778ebf5a39af136227884a6ab2efc3c4e73e2dbb1d05ecf03ea196705", size = 14583, upload-time = "2025-12-11T13:36:33.164Z" }, ] -[[package]] -name = "opentelemetry-proto" -version = "1.39.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "protobuf" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/49/1d/f25d76d8260c156c40c97c9ed4511ec0f9ce353f8108ca6e7561f82a06b2/opentelemetry_proto-1.39.1.tar.gz", hash = "sha256:6c8e05144fc0d3ed4d22c2289c6b126e03bcd0e6a7da0f16cedd2e1c2772e2c8", size = 46152, upload-time = "2025-12-11T13:32:48.681Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/51/95/b40c96a7b5203005a0b03d8ce8cd212ff23f1793d5ba289c87a097571b18/opentelemetry_proto-1.39.1-py3-none-any.whl", hash = "sha256:22cdc78efd3b3765d09e68bfbd010d4fc254c9818afd0b6b423387d9dee46007", size = 72535, upload-time = "2025-12-11T13:32:33.866Z" }, -] - [[package]] name = "opentelemetry-resource-detector-azure" version = "0.1.5" @@ -4547,11 +4272,11 @@ wheels = [ [[package]] name = "packaging" -version = "25.0" +version = "24.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950, upload-time = "2024-11-08T09:47:47.202Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, + { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451, upload-time = "2024-11-08T09:47:44.722Z" }, ] [[package]] @@ -4639,7 +4364,7 @@ name = "pexpect" version = "4.9.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "ptyprocess" }, + { name = "ptyprocess", marker = "sys_platform != 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450, upload-time = "2023-11-25T09:07:26.339Z" } wheels = [ @@ -4857,18 +4582,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9d/0d/431bb85252119f5d2260417fa7d164619b31eed8f1725b364dc0ade43a8e/preshed-3.0.12-cp314-cp314t-win_arm64.whl", hash = "sha256:c0c0d3b66b4c1e40aa6042721492f7b07fc9679ab6c361bc121aa54a1c3ef63f", size = 114839, upload-time = "2025-11-17T13:00:19.513Z" }, ] -[[package]] -name = "prettytable" -version = "3.17.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "wcwidth" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/79/45/b0847d88d6cfeb4413566738c8bbf1e1995fad3d42515327ff32cc1eb578/prettytable-3.17.0.tar.gz", hash = "sha256:59f2590776527f3c9e8cf9fe7b66dd215837cca96a9c39567414cbc632e8ddb0", size = 67892, upload-time = "2025-11-14T17:33:20.212Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/8c/83087ebc47ab0396ce092363001fa37c17153119ee282700c0713a195853/prettytable-3.17.0-py3-none-any.whl", hash = "sha256:aad69b294ddbe3e1f95ef8886a060ed1666a0b83018bbf56295f6f226c43d287", size = 34433, upload-time = "2025-11-14T17:33:19.093Z" }, -] - [[package]] name = "prometheus-client" version = "0.23.1" @@ -5004,21 +4717,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" }, ] -[[package]] -name = "protobuf" -version = "6.33.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ba/25/7c72c307aafc96fa87062aa6291d9f7c94836e43214d43722e86037aac02/protobuf-6.33.5.tar.gz", hash = "sha256:6ddcac2a081f8b7b9642c09406bc6a4290128fce5f471cddd165960bb9119e5c", size = 444465, upload-time = "2026-01-29T21:51:33.494Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b1/79/af92d0a8369732b027e6d6084251dd8e782c685c72da161bd4a2e00fbabb/protobuf-6.33.5-cp310-abi3-win32.whl", hash = "sha256:d71b040839446bac0f4d162e758bea99c8251161dae9d0983a3b88dee345153b", size = 425769, upload-time = "2026-01-29T21:51:21.751Z" }, - { url = "https://files.pythonhosted.org/packages/55/75/bb9bc917d10e9ee13dee8607eb9ab963b7cf8be607c46e7862c748aa2af7/protobuf-6.33.5-cp310-abi3-win_amd64.whl", hash = "sha256:3093804752167bcab3998bec9f1048baae6e29505adaf1afd14a37bddede533c", size = 437118, upload-time = "2026-01-29T21:51:24.022Z" }, - { url = "https://files.pythonhosted.org/packages/a2/6b/e48dfc1191bc5b52950246275bf4089773e91cb5ba3592621723cdddca62/protobuf-6.33.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:a5cb85982d95d906df1e2210e58f8e4f1e3cdc088e52c921a041f9c9a0386de5", size = 427766, upload-time = "2026-01-29T21:51:25.413Z" }, - { url = "https://files.pythonhosted.org/packages/4e/b1/c79468184310de09d75095ed1314b839eb2f72df71097db9d1404a1b2717/protobuf-6.33.5-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:9b71e0281f36f179d00cbcb119cb19dec4d14a81393e5ea220f64b286173e190", size = 324638, upload-time = "2026-01-29T21:51:26.423Z" }, - { url = "https://files.pythonhosted.org/packages/c5/f5/65d838092fd01c44d16037953fd4c2cc851e783de9b8f02b27ec4ffd906f/protobuf-6.33.5-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8afa18e1d6d20af15b417e728e9f60f3aa108ee76f23c3b2c07a2c3b546d3afd", size = 339411, upload-time = "2026-01-29T21:51:27.446Z" }, - { url = "https://files.pythonhosted.org/packages/9b/53/a9443aa3ca9ba8724fdfa02dd1887c1bcd8e89556b715cfbacca6b63dbec/protobuf-6.33.5-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:cbf16ba3350fb7b889fca858fb215967792dc125b35c7976ca4818bee3521cf0", size = 323465, upload-time = "2026-01-29T21:51:28.925Z" }, - { url = "https://files.pythonhosted.org/packages/57/bf/2086963c69bdac3d7cff1cc7ff79b8ce5ea0bec6797a017e1be338a46248/protobuf-6.33.5-py3-none-any.whl", hash = "sha256:69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02", size = 170687, upload-time = "2026-01-29T21:51:32.557Z" }, -] - [[package]] name = "psutil" version = "7.2.1" @@ -5067,139 +4765,121 @@ wheels = [ [[package]] name = "pyarrow" -version = "21.0.0" +version = "19.0.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ + "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'darwin'", + "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'win32'", "python_full_version == '3.11.*' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version == '3.11.*' and sys_platform == 'win32'", "python_full_version < '3.11' and sys_platform == 'darwin'", "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", -] -sdist = { url = "https://files.pythonhosted.org/packages/ef/c2/ea068b8f00905c06329a3dfcd40d0fcc2b7d0f2e355bdb25b65e0a0e4cd4/pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc", size = 1133487, upload-time = "2025-07-18T00:57:31.761Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/17/d9/110de31880016e2afc52d8580b397dbe47615defbf09ca8cf55f56c62165/pyarrow-21.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e563271e2c5ff4d4a4cbeb2c83d5cf0d4938b891518e676025f7268c6fe5fe26", size = 31196837, upload-time = "2025-07-18T00:54:34.755Z" }, - { url = "https://files.pythonhosted.org/packages/df/5f/c1c1997613abf24fceb087e79432d24c19bc6f7259cab57c2c8e5e545fab/pyarrow-21.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fee33b0ca46f4c85443d6c450357101e47d53e6c3f008d658c27a2d020d44c79", size = 32659470, upload-time = "2025-07-18T00:54:38.329Z" }, - { url = "https://files.pythonhosted.org/packages/3e/ed/b1589a777816ee33ba123ba1e4f8f02243a844fed0deec97bde9fb21a5cf/pyarrow-21.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7be45519b830f7c24b21d630a31d48bcebfd5d4d7f9d3bdb49da9cdf6d764edb", size = 41055619, upload-time = "2025-07-18T00:54:42.172Z" }, - { url = "https://files.pythonhosted.org/packages/44/28/b6672962639e85dc0ac36f71ab3a8f5f38e01b51343d7aa372a6b56fa3f3/pyarrow-21.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:26bfd95f6bff443ceae63c65dc7e048670b7e98bc892210acba7e4995d3d4b51", size = 42733488, upload-time = "2025-07-18T00:54:47.132Z" }, - { url = "https://files.pythonhosted.org/packages/f8/cc/de02c3614874b9089c94eac093f90ca5dfa6d5afe45de3ba847fd950fdf1/pyarrow-21.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bd04ec08f7f8bd113c55868bd3fc442a9db67c27af098c5f814a3091e71cc61a", size = 43329159, upload-time = "2025-07-18T00:54:51.686Z" }, - { url = "https://files.pythonhosted.org/packages/a6/3e/99473332ac40278f196e105ce30b79ab8affab12f6194802f2593d6b0be2/pyarrow-21.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9b0b14b49ac10654332a805aedfc0147fb3469cbf8ea951b3d040dab12372594", size = 45050567, upload-time = "2025-07-18T00:54:56.679Z" }, - { url = "https://files.pythonhosted.org/packages/7b/f5/c372ef60593d713e8bfbb7e0c743501605f0ad00719146dc075faf11172b/pyarrow-21.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:9d9f8bcb4c3be7738add259738abdeddc363de1b80e3310e04067aa1ca596634", size = 26217959, upload-time = "2025-07-18T00:55:00.482Z" }, - { url = "https://files.pythonhosted.org/packages/94/dc/80564a3071a57c20b7c32575e4a0120e8a330ef487c319b122942d665960/pyarrow-21.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c077f48aab61738c237802836fc3844f85409a46015635198761b0d6a688f87b", size = 31243234, upload-time = "2025-07-18T00:55:03.812Z" }, - { url = "https://files.pythonhosted.org/packages/ea/cc/3b51cb2db26fe535d14f74cab4c79b191ed9a8cd4cbba45e2379b5ca2746/pyarrow-21.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:689f448066781856237eca8d1975b98cace19b8dd2ab6145bf49475478bcaa10", size = 32714370, upload-time = "2025-07-18T00:55:07.495Z" }, - { url = "https://files.pythonhosted.org/packages/24/11/a4431f36d5ad7d83b87146f515c063e4d07ef0b7240876ddb885e6b44f2e/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:479ee41399fcddc46159a551705b89c05f11e8b8cb8e968f7fec64f62d91985e", size = 41135424, upload-time = "2025-07-18T00:55:11.461Z" }, - { url = "https://files.pythonhosted.org/packages/74/dc/035d54638fc5d2971cbf1e987ccd45f1091c83bcf747281cf6cc25e72c88/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:40ebfcb54a4f11bcde86bc586cbd0272bac0d516cfa539c799c2453768477569", size = 42823810, upload-time = "2025-07-18T00:55:16.301Z" }, - { url = "https://files.pythonhosted.org/packages/2e/3b/89fced102448a9e3e0d4dded1f37fa3ce4700f02cdb8665457fcc8015f5b/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8d58d8497814274d3d20214fbb24abcad2f7e351474357d552a8d53bce70c70e", size = 43391538, upload-time = "2025-07-18T00:55:23.82Z" }, - { url = "https://files.pythonhosted.org/packages/fb/bb/ea7f1bd08978d39debd3b23611c293f64a642557e8141c80635d501e6d53/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:585e7224f21124dd57836b1530ac8f2df2afc43c861d7bf3d58a4870c42ae36c", size = 45120056, upload-time = "2025-07-18T00:55:28.231Z" }, - { url = "https://files.pythonhosted.org/packages/6e/0b/77ea0600009842b30ceebc3337639a7380cd946061b620ac1a2f3cb541e2/pyarrow-21.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:555ca6935b2cbca2c0e932bedd853e9bc523098c39636de9ad4693b5b1df86d6", size = 26220568, upload-time = "2025-07-18T00:55:32.122Z" }, - { url = "https://files.pythonhosted.org/packages/ca/d4/d4f817b21aacc30195cf6a46ba041dd1be827efa4a623cc8bf39a1c2a0c0/pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd", size = 31160305, upload-time = "2025-07-18T00:55:35.373Z" }, - { url = "https://files.pythonhosted.org/packages/a2/9c/dcd38ce6e4b4d9a19e1d36914cb8e2b1da4e6003dd075474c4cfcdfe0601/pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876", size = 32684264, upload-time = "2025-07-18T00:55:39.303Z" }, - { url = "https://files.pythonhosted.org/packages/4f/74/2a2d9f8d7a59b639523454bec12dba35ae3d0a07d8ab529dc0809f74b23c/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d", size = 41108099, upload-time = "2025-07-18T00:55:42.889Z" }, - { url = "https://files.pythonhosted.org/packages/ad/90/2660332eeb31303c13b653ea566a9918484b6e4d6b9d2d46879a33ab0622/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e", size = 42829529, upload-time = "2025-07-18T00:55:47.069Z" }, - { url = "https://files.pythonhosted.org/packages/33/27/1a93a25c92717f6aa0fca06eb4700860577d016cd3ae51aad0e0488ac899/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82", size = 43367883, upload-time = "2025-07-18T00:55:53.069Z" }, - { url = "https://files.pythonhosted.org/packages/05/d9/4d09d919f35d599bc05c6950095e358c3e15148ead26292dfca1fb659b0c/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623", size = 45133802, upload-time = "2025-07-18T00:55:57.714Z" }, - { url = "https://files.pythonhosted.org/packages/71/30/f3795b6e192c3ab881325ffe172e526499eb3780e306a15103a2764916a2/pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18", size = 26203175, upload-time = "2025-07-18T00:56:01.364Z" }, - { url = "https://files.pythonhosted.org/packages/16/ca/c7eaa8e62db8fb37ce942b1ea0c6d7abfe3786ca193957afa25e71b81b66/pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a", size = 31154306, upload-time = "2025-07-18T00:56:04.42Z" }, - { url = "https://files.pythonhosted.org/packages/ce/e8/e87d9e3b2489302b3a1aea709aaca4b781c5252fcb812a17ab6275a9a484/pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe", size = 32680622, upload-time = "2025-07-18T00:56:07.505Z" }, - { url = "https://files.pythonhosted.org/packages/84/52/79095d73a742aa0aba370c7942b1b655f598069489ab387fe47261a849e1/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd", size = 41104094, upload-time = "2025-07-18T00:56:10.994Z" }, - { url = "https://files.pythonhosted.org/packages/89/4b/7782438b551dbb0468892a276b8c789b8bbdb25ea5c5eb27faadd753e037/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61", size = 42825576, upload-time = "2025-07-18T00:56:15.569Z" }, - { url = "https://files.pythonhosted.org/packages/b3/62/0f29de6e0a1e33518dec92c65be0351d32d7ca351e51ec5f4f837a9aab91/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d", size = 43368342, upload-time = "2025-07-18T00:56:19.531Z" }, - { url = "https://files.pythonhosted.org/packages/90/c7/0fa1f3f29cf75f339768cc698c8ad4ddd2481c1742e9741459911c9ac477/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99", size = 45131218, upload-time = "2025-07-18T00:56:23.347Z" }, - { url = "https://files.pythonhosted.org/packages/01/63/581f2076465e67b23bc5a37d4a2abff8362d389d29d8105832e82c9c811c/pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636", size = 26087551, upload-time = "2025-07-18T00:56:26.758Z" }, - { url = "https://files.pythonhosted.org/packages/c9/ab/357d0d9648bb8241ee7348e564f2479d206ebe6e1c47ac5027c2e31ecd39/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da", size = 31290064, upload-time = "2025-07-18T00:56:30.214Z" }, - { url = "https://files.pythonhosted.org/packages/3f/8a/5685d62a990e4cac2043fc76b4661bf38d06efed55cf45a334b455bd2759/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7", size = 32727837, upload-time = "2025-07-18T00:56:33.935Z" }, - { url = "https://files.pythonhosted.org/packages/fc/de/c0828ee09525c2bafefd3e736a248ebe764d07d0fd762d4f0929dbc516c9/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6", size = 41014158, upload-time = "2025-07-18T00:56:37.528Z" }, - { url = "https://files.pythonhosted.org/packages/6e/26/a2865c420c50b7a3748320b614f3484bfcde8347b2639b2b903b21ce6a72/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8", size = 42667885, upload-time = "2025-07-18T00:56:41.483Z" }, - { url = "https://files.pythonhosted.org/packages/0a/f9/4ee798dc902533159250fb4321267730bc0a107d8c6889e07c3add4fe3a5/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503", size = 43276625, upload-time = "2025-07-18T00:56:48.002Z" }, - { url = "https://files.pythonhosted.org/packages/5a/da/e02544d6997037a4b0d22d8e5f66bc9315c3671371a8b18c79ade1cefe14/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79", size = 44951890, upload-time = "2025-07-18T00:56:52.568Z" }, - { url = "https://files.pythonhosted.org/packages/e5/4e/519c1bc1876625fe6b71e9a28287c43ec2f20f73c658b9ae1d485c0c206e/pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10", size = 26371006, upload-time = "2025-07-18T00:56:56.379Z" }, + "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version < '3.11' and sys_platform == 'win32'", +] +sdist = { url = "https://files.pythonhosted.org/packages/7f/09/a9046344212690f0632b9c709f9bf18506522feb333c894d0de81d62341a/pyarrow-19.0.1.tar.gz", hash = "sha256:3bf266b485df66a400f282ac0b6d1b500b9d2ae73314a153dbe97d6d5cc8a99e", size = 1129437, upload-time = "2025-02-18T18:55:57.027Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/36/01/b23b514d86b839956238d3f8ef206fd2728eee87ff1b8ce150a5678d9721/pyarrow-19.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fc28912a2dc924dddc2087679cc8b7263accc71b9ff025a1362b004711661a69", size = 30688914, upload-time = "2025-02-18T18:51:37.575Z" }, + { url = "https://files.pythonhosted.org/packages/c6/68/218ff7cf4a0652a933e5f2ed11274f724dd43b9813cb18dd72c0a35226a2/pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fca15aabbe9b8355800d923cc2e82c8ef514af321e18b437c3d782aa884eaeec", size = 32102866, upload-time = "2025-02-18T18:51:44.358Z" }, + { url = "https://files.pythonhosted.org/packages/98/01/c295050d183014f4a2eb796d7d2bbfa04b6cccde7258bb68aacf6f18779b/pyarrow-19.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad76aef7f5f7e4a757fddcdcf010a8290958f09e3470ea458c80d26f4316ae89", size = 41147682, upload-time = "2025-02-18T18:51:49.481Z" }, + { url = "https://files.pythonhosted.org/packages/40/17/a6c3db0b5f3678f33bbb552d2acbc16def67f89a72955b67b0109af23eb0/pyarrow-19.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d03c9d6f2a3dffbd62671ca070f13fc527bb1867b4ec2b98c7eeed381d4f389a", size = 42179192, upload-time = "2025-02-18T18:51:56.265Z" }, + { url = "https://files.pythonhosted.org/packages/cf/75/c7c8e599300d8cebb6cb339014800e1c720c9db2a3fcb66aa64ec84bac72/pyarrow-19.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:65cf9feebab489b19cdfcfe4aa82f62147218558d8d3f0fc1e9dea0ab8e7905a", size = 40517272, upload-time = "2025-02-18T18:52:02.969Z" }, + { url = "https://files.pythonhosted.org/packages/ef/c9/68ab123ee1528699c4d5055f645ecd1dd68ff93e4699527249d02f55afeb/pyarrow-19.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:41f9706fbe505e0abc10e84bf3a906a1338905cbbcf1177b71486b03e6ea6608", size = 42069036, upload-time = "2025-02-18T18:52:10.173Z" }, + { url = "https://files.pythonhosted.org/packages/54/e3/d5cfd7654084e6c0d9c3ce949e5d9e0ccad569ae1e2d5a68a3ec03b2be89/pyarrow-19.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:c6cb2335a411b713fdf1e82a752162f72d4a7b5dbc588e32aa18383318b05866", size = 25277951, upload-time = "2025-02-18T18:52:15.459Z" }, + { url = "https://files.pythonhosted.org/packages/a0/55/f1a8d838ec07fe3ca53edbe76f782df7b9aafd4417080eebf0b42aab0c52/pyarrow-19.0.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc55d71898ea30dc95900297d191377caba257612f384207fe9f8293b5850f90", size = 30713987, upload-time = "2025-02-18T18:52:20.463Z" }, + { url = "https://files.pythonhosted.org/packages/13/12/428861540bb54c98a140ae858a11f71d041ef9e501e6b7eb965ca7909505/pyarrow-19.0.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:7a544ec12de66769612b2d6988c36adc96fb9767ecc8ee0a4d270b10b1c51e00", size = 32135613, upload-time = "2025-02-18T18:52:25.29Z" }, + { url = "https://files.pythonhosted.org/packages/2f/8a/23d7cc5ae2066c6c736bce1db8ea7bc9ac3ef97ac7e1c1667706c764d2d9/pyarrow-19.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0148bb4fc158bfbc3d6dfe5001d93ebeed253793fff4435167f6ce1dc4bddeae", size = 41149147, upload-time = "2025-02-18T18:52:30.975Z" }, + { url = "https://files.pythonhosted.org/packages/a2/7a/845d151bb81a892dfb368bf11db584cf8b216963ccce40a5cf50a2492a18/pyarrow-19.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f24faab6ed18f216a37870d8c5623f9c044566d75ec586ef884e13a02a9d62c5", size = 42178045, upload-time = "2025-02-18T18:52:36.859Z" }, + { url = "https://files.pythonhosted.org/packages/a7/31/e7282d79a70816132cf6cae7e378adfccce9ae10352d21c2fecf9d9756dd/pyarrow-19.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:4982f8e2b7afd6dae8608d70ba5bd91699077323f812a0448d8b7abdff6cb5d3", size = 40532998, upload-time = "2025-02-18T18:52:42.578Z" }, + { url = "https://files.pythonhosted.org/packages/b8/82/20f3c290d6e705e2ee9c1fa1d5a0869365ee477e1788073d8b548da8b64c/pyarrow-19.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:49a3aecb62c1be1d822f8bf629226d4a96418228a42f5b40835c1f10d42e4db6", size = 42084055, upload-time = "2025-02-18T18:52:48.749Z" }, + { url = "https://files.pythonhosted.org/packages/ff/77/e62aebd343238863f2c9f080ad2ef6ace25c919c6ab383436b5b81cbeef7/pyarrow-19.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:008a4009efdb4ea3d2e18f05cd31f9d43c388aad29c636112c2966605ba33466", size = 25283133, upload-time = "2025-02-18T18:52:54.549Z" }, + { url = "https://files.pythonhosted.org/packages/78/b4/94e828704b050e723f67d67c3535cf7076c7432cd4cf046e4bb3b96a9c9d/pyarrow-19.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:80b2ad2b193e7d19e81008a96e313fbd53157945c7be9ac65f44f8937a55427b", size = 30670749, upload-time = "2025-02-18T18:53:00.062Z" }, + { url = "https://files.pythonhosted.org/packages/7e/3b/4692965e04bb1df55e2c314c4296f1eb12b4f3052d4cf43d29e076aedf66/pyarrow-19.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:ee8dec072569f43835932a3b10c55973593abc00936c202707a4ad06af7cb294", size = 32128007, upload-time = "2025-02-18T18:53:06.581Z" }, + { url = "https://files.pythonhosted.org/packages/22/f7/2239af706252c6582a5635c35caa17cb4d401cd74a87821ef702e3888957/pyarrow-19.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5d1ec7ec5324b98887bdc006f4d2ce534e10e60f7ad995e7875ffa0ff9cb14", size = 41144566, upload-time = "2025-02-18T18:53:11.958Z" }, + { url = "https://files.pythonhosted.org/packages/fb/e3/c9661b2b2849cfefddd9fd65b64e093594b231b472de08ff658f76c732b2/pyarrow-19.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3ad4c0eb4e2a9aeb990af6c09e6fa0b195c8c0e7b272ecc8d4d2b6574809d34", size = 42202991, upload-time = "2025-02-18T18:53:17.678Z" }, + { url = "https://files.pythonhosted.org/packages/fe/4f/a2c0ed309167ef436674782dfee4a124570ba64299c551e38d3fdaf0a17b/pyarrow-19.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d383591f3dcbe545f6cc62daaef9c7cdfe0dff0fb9e1c8121101cabe9098cfa6", size = 40507986, upload-time = "2025-02-18T18:53:26.263Z" }, + { url = "https://files.pythonhosted.org/packages/27/2e/29bb28a7102a6f71026a9d70d1d61df926887e36ec797f2e6acfd2dd3867/pyarrow-19.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b4c4156a625f1e35d6c0b2132635a237708944eb41df5fbe7d50f20d20c17832", size = 42087026, upload-time = "2025-02-18T18:53:33.063Z" }, + { url = "https://files.pythonhosted.org/packages/16/33/2a67c0f783251106aeeee516f4806161e7b481f7d744d0d643d2f30230a5/pyarrow-19.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:5bd1618ae5e5476b7654c7b55a6364ae87686d4724538c24185bbb2952679960", size = 25250108, upload-time = "2025-02-18T18:53:38.462Z" }, + { url = "https://files.pythonhosted.org/packages/2b/8d/275c58d4b00781bd36579501a259eacc5c6dfb369be4ddeb672ceb551d2d/pyarrow-19.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e45274b20e524ae5c39d7fc1ca2aa923aab494776d2d4b316b49ec7572ca324c", size = 30653552, upload-time = "2025-02-18T18:53:44.357Z" }, + { url = "https://files.pythonhosted.org/packages/a0/9e/e6aca5cc4ef0c7aec5f8db93feb0bde08dbad8c56b9014216205d271101b/pyarrow-19.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d9dedeaf19097a143ed6da37f04f4051aba353c95ef507764d344229b2b740ae", size = 32103413, upload-time = "2025-02-18T18:53:52.971Z" }, + { url = "https://files.pythonhosted.org/packages/6a/fa/a7033f66e5d4f1308c7eb0dfcd2ccd70f881724eb6fd1776657fdf65458f/pyarrow-19.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ebfb5171bb5f4a52319344ebbbecc731af3f021e49318c74f33d520d31ae0c4", size = 41134869, upload-time = "2025-02-18T18:53:59.471Z" }, + { url = "https://files.pythonhosted.org/packages/2d/92/34d2569be8e7abdc9d145c98dc410db0071ac579b92ebc30da35f500d630/pyarrow-19.0.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a21d39fbdb948857f67eacb5bbaaf36802de044ec36fbef7a1c8f0dd3a4ab2", size = 42192626, upload-time = "2025-02-18T18:54:06.062Z" }, + { url = "https://files.pythonhosted.org/packages/0a/1f/80c617b1084fc833804dc3309aa9d8daacd46f9ec8d736df733f15aebe2c/pyarrow-19.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:99bc1bec6d234359743b01e70d4310d0ab240c3d6b0da7e2a93663b0158616f6", size = 40496708, upload-time = "2025-02-18T18:54:12.347Z" }, + { url = "https://files.pythonhosted.org/packages/e6/90/83698fcecf939a611c8d9a78e38e7fed7792dcc4317e29e72cf8135526fb/pyarrow-19.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1b93ef2c93e77c442c979b0d596af45e4665d8b96da598db145b0fec014b9136", size = 42075728, upload-time = "2025-02-18T18:54:19.364Z" }, + { url = "https://files.pythonhosted.org/packages/40/49/2325f5c9e7a1c125c01ba0c509d400b152c972a47958768e4e35e04d13d8/pyarrow-19.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:d9d46e06846a41ba906ab25302cf0fd522f81aa2a85a71021826f34639ad31ef", size = 25242568, upload-time = "2025-02-18T18:54:25.846Z" }, + { url = "https://files.pythonhosted.org/packages/3f/72/135088d995a759d4d916ec4824cb19e066585b4909ebad4ab196177aa825/pyarrow-19.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:c0fe3dbbf054a00d1f162fda94ce236a899ca01123a798c561ba307ca38af5f0", size = 30702371, upload-time = "2025-02-18T18:54:30.665Z" }, + { url = "https://files.pythonhosted.org/packages/2e/01/00beeebd33d6bac701f20816a29d2018eba463616bbc07397fdf99ac4ce3/pyarrow-19.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:96606c3ba57944d128e8a8399da4812f56c7f61de8c647e3470b417f795d0ef9", size = 32116046, upload-time = "2025-02-18T18:54:35.995Z" }, + { url = "https://files.pythonhosted.org/packages/1f/c9/23b1ea718dfe967cbd986d16cf2a31fe59d015874258baae16d7ea0ccabc/pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f04d49a6b64cf24719c080b3c2029a3a5b16417fd5fd7c4041f94233af732f3", size = 41091183, upload-time = "2025-02-18T18:54:42.662Z" }, + { url = "https://files.pythonhosted.org/packages/3a/d4/b4a3aa781a2c715520aa8ab4fe2e7fa49d33a1d4e71c8fc6ab7b5de7a3f8/pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a9137cf7e1640dce4c190551ee69d478f7121b5c6f323553b319cac936395f6", size = 42171896, upload-time = "2025-02-18T18:54:49.808Z" }, + { url = "https://files.pythonhosted.org/packages/23/1b/716d4cd5a3cbc387c6e6745d2704c4b46654ba2668260d25c402626c5ddb/pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:7c1bca1897c28013db5e4c83944a2ab53231f541b9e0c3f4791206d0c0de389a", size = 40464851, upload-time = "2025-02-18T18:54:57.073Z" }, + { url = "https://files.pythonhosted.org/packages/ed/bd/54907846383dcc7ee28772d7e646f6c34276a17da740002a5cefe90f04f7/pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:58d9397b2e273ef76264b45531e9d552d8ec8a6688b7390b5be44c02a37aade8", size = 42085744, upload-time = "2025-02-18T18:55:08.562Z" }, ] [[package]] name = "pyarrow" -version = "22.0.0" +version = "24.0.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'darwin'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'darwin'", "python_full_version >= '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", - "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", -] -sdist = { url = "https://files.pythonhosted.org/packages/30/53/04a7fdc63e6056116c9ddc8b43bc28c12cdd181b85cbeadb79278475f3ae/pyarrow-22.0.0.tar.gz", hash = "sha256:3d600dc583260d845c7d8a6db540339dd883081925da2bd1c5cb808f720b3cd9", size = 1151151, upload-time = "2025-10-24T12:30:00.762Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/9b/cb3f7e0a345353def531ca879053e9ef6b9f38ed91aebcf68b09ba54dec0/pyarrow-22.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:77718810bd3066158db1e95a63c160ad7ce08c6b0710bc656055033e39cdad88", size = 34223968, upload-time = "2025-10-24T10:03:31.21Z" }, - { url = "https://files.pythonhosted.org/packages/6c/41/3184b8192a120306270c5307f105b70320fdaa592c99843c5ef78aaefdcf/pyarrow-22.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:44d2d26cda26d18f7af7db71453b7b783788322d756e81730acb98f24eb90ace", size = 35942085, upload-time = "2025-10-24T10:03:38.146Z" }, - { url = "https://files.pythonhosted.org/packages/d9/3d/a1eab2f6f08001f9fb714b8ed5cfb045e2fe3e3e3c0c221f2c9ed1e6d67d/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b9d71701ce97c95480fecb0039ec5bb889e75f110da72005743451339262f4ce", size = 44964613, upload-time = "2025-10-24T10:03:46.516Z" }, - { url = "https://files.pythonhosted.org/packages/46/46/a1d9c24baf21cfd9ce994ac820a24608decf2710521b29223d4334985127/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:710624ab925dc2b05a6229d47f6f0dac1c1155e6ed559be7109f684eba048a48", size = 47627059, upload-time = "2025-10-24T10:03:55.353Z" }, - { url = "https://files.pythonhosted.org/packages/3a/4c/f711acb13075c1391fd54bc17e078587672c575f8de2a6e62509af026dcf/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f963ba8c3b0199f9d6b794c90ec77545e05eadc83973897a4523c9e8d84e9340", size = 47947043, upload-time = "2025-10-24T10:04:05.408Z" }, - { url = "https://files.pythonhosted.org/packages/4e/70/1f3180dd7c2eab35c2aca2b29ace6c519f827dcd4cfeb8e0dca41612cf7a/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bd0d42297ace400d8febe55f13fdf46e86754842b860c978dfec16f081e5c653", size = 50206505, upload-time = "2025-10-24T10:04:15.786Z" }, - { url = "https://files.pythonhosted.org/packages/80/07/fea6578112c8c60ffde55883a571e4c4c6bc7049f119d6b09333b5cc6f73/pyarrow-22.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:00626d9dc0f5ef3a75fe63fd68b9c7c8302d2b5bbc7f74ecaedba83447a24f84", size = 28101641, upload-time = "2025-10-24T10:04:22.57Z" }, - { url = "https://files.pythonhosted.org/packages/2e/b7/18f611a8cdc43417f9394a3ccd3eace2f32183c08b9eddc3d17681819f37/pyarrow-22.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:3e294c5eadfb93d78b0763e859a0c16d4051fc1c5231ae8956d61cb0b5666f5a", size = 34272022, upload-time = "2025-10-24T10:04:28.973Z" }, - { url = "https://files.pythonhosted.org/packages/26/5c/f259e2526c67eb4b9e511741b19870a02363a47a35edbebc55c3178db22d/pyarrow-22.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:69763ab2445f632d90b504a815a2a033f74332997052b721002298ed6de40f2e", size = 35995834, upload-time = "2025-10-24T10:04:35.467Z" }, - { url = "https://files.pythonhosted.org/packages/50/8d/281f0f9b9376d4b7f146913b26fac0aa2829cd1ee7e997f53a27411bbb92/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:b41f37cabfe2463232684de44bad753d6be08a7a072f6a83447eeaf0e4d2a215", size = 45030348, upload-time = "2025-10-24T10:04:43.366Z" }, - { url = "https://files.pythonhosted.org/packages/f5/e5/53c0a1c428f0976bf22f513d79c73000926cb00b9c138d8e02daf2102e18/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:35ad0f0378c9359b3f297299c3309778bb03b8612f987399a0333a560b43862d", size = 47699480, upload-time = "2025-10-24T10:04:51.486Z" }, - { url = "https://files.pythonhosted.org/packages/95/e1/9dbe4c465c3365959d183e6345d0a8d1dc5b02ca3f8db4760b3bc834cf25/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8382ad21458075c2e66a82a29d650f963ce51c7708c7c0ff313a8c206c4fd5e8", size = 48011148, upload-time = "2025-10-24T10:04:59.585Z" }, - { url = "https://files.pythonhosted.org/packages/c5/b4/7caf5d21930061444c3cf4fa7535c82faf5263e22ce43af7c2759ceb5b8b/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1a812a5b727bc09c3d7ea072c4eebf657c2f7066155506ba31ebf4792f88f016", size = 50276964, upload-time = "2025-10-24T10:05:08.175Z" }, - { url = "https://files.pythonhosted.org/packages/ae/f3/cec89bd99fa3abf826f14d4e53d3d11340ce6f6af4d14bdcd54cd83b6576/pyarrow-22.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ec5d40dd494882704fb876c16fa7261a69791e784ae34e6b5992e977bd2e238c", size = 28106517, upload-time = "2025-10-24T10:05:14.314Z" }, - { url = "https://files.pythonhosted.org/packages/af/63/ba23862d69652f85b615ca14ad14f3bcfc5bf1b99ef3f0cd04ff93fdad5a/pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bea79263d55c24a32b0d79c00a1c58bb2ee5f0757ed95656b01c0fb310c5af3d", size = 34211578, upload-time = "2025-10-24T10:05:21.583Z" }, - { url = "https://files.pythonhosted.org/packages/b1/d0/f9ad86fe809efd2bcc8be32032fa72e8b0d112b01ae56a053006376c5930/pyarrow-22.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:12fe549c9b10ac98c91cf791d2945e878875d95508e1a5d14091a7aaa66d9cf8", size = 35989906, upload-time = "2025-10-24T10:05:29.485Z" }, - { url = "https://files.pythonhosted.org/packages/b4/a8/f910afcb14630e64d673f15904ec27dd31f1e009b77033c365c84e8c1e1d/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:334f900ff08ce0423407af97e6c26ad5d4e3b0763645559ece6fbf3747d6a8f5", size = 45021677, upload-time = "2025-10-24T10:05:38.274Z" }, - { url = "https://files.pythonhosted.org/packages/13/95/aec81f781c75cd10554dc17a25849c720d54feafb6f7847690478dcf5ef8/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c6c791b09c57ed76a18b03f2631753a4960eefbbca80f846da8baefc6491fcfe", size = 47726315, upload-time = "2025-10-24T10:05:47.314Z" }, - { url = "https://files.pythonhosted.org/packages/bb/d4/74ac9f7a54cfde12ee42734ea25d5a3c9a45db78f9def949307a92720d37/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c3200cb41cdbc65156e5f8c908d739b0dfed57e890329413da2748d1a2cd1a4e", size = 47990906, upload-time = "2025-10-24T10:05:58.254Z" }, - { url = "https://files.pythonhosted.org/packages/2e/71/fedf2499bf7a95062eafc989ace56572f3343432570e1c54e6599d5b88da/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ac93252226cf288753d8b46280f4edf3433bf9508b6977f8dd8526b521a1bbb9", size = 50306783, upload-time = "2025-10-24T10:06:08.08Z" }, - { url = "https://files.pythonhosted.org/packages/68/ed/b202abd5a5b78f519722f3d29063dda03c114711093c1995a33b8e2e0f4b/pyarrow-22.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:44729980b6c50a5f2bfcc2668d36c569ce17f8b17bccaf470c4313dcbbf13c9d", size = 27972883, upload-time = "2025-10-24T10:06:14.204Z" }, - { url = "https://files.pythonhosted.org/packages/a6/d6/d0fac16a2963002fc22c8fa75180a838737203d558f0ed3b564c4a54eef5/pyarrow-22.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e6e95176209257803a8b3d0394f21604e796dadb643d2f7ca21b66c9c0b30c9a", size = 34204629, upload-time = "2025-10-24T10:06:20.274Z" }, - { url = "https://files.pythonhosted.org/packages/c6/9c/1d6357347fbae062ad3f17082f9ebc29cc733321e892c0d2085f42a2212b/pyarrow-22.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:001ea83a58024818826a9e3f89bf9310a114f7e26dfe404a4c32686f97bd7901", size = 35985783, upload-time = "2025-10-24T10:06:27.301Z" }, - { url = "https://files.pythonhosted.org/packages/ff/c0/782344c2ce58afbea010150df07e3a2f5fdad299cd631697ae7bd3bac6e3/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ce20fe000754f477c8a9125543f1936ea5b8867c5406757c224d745ed033e691", size = 45020999, upload-time = "2025-10-24T10:06:35.387Z" }, - { url = "https://files.pythonhosted.org/packages/1b/8b/5362443737a5307a7b67c1017c42cd104213189b4970bf607e05faf9c525/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e0a15757fccb38c410947df156f9749ae4a3c89b2393741a50521f39a8cf202a", size = 47724601, upload-time = "2025-10-24T10:06:43.551Z" }, - { url = "https://files.pythonhosted.org/packages/69/4d/76e567a4fc2e190ee6072967cb4672b7d9249ac59ae65af2d7e3047afa3b/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cedb9dd9358e4ea1d9bce3665ce0797f6adf97ff142c8e25b46ba9cdd508e9b6", size = 48001050, upload-time = "2025-10-24T10:06:52.284Z" }, - { url = "https://files.pythonhosted.org/packages/01/5e/5653f0535d2a1aef8223cee9d92944cb6bccfee5cf1cd3f462d7cb022790/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:252be4a05f9d9185bb8c18e83764ebcfea7185076c07a7a662253af3a8c07941", size = 50307877, upload-time = "2025-10-24T10:07:02.405Z" }, - { url = "https://files.pythonhosted.org/packages/2d/f8/1d0bd75bf9328a3b826e24a16e5517cd7f9fbf8d34a3184a4566ef5a7f29/pyarrow-22.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:a4893d31e5ef780b6edcaf63122df0f8d321088bb0dee4c8c06eccb1ca28d145", size = 27977099, upload-time = "2025-10-24T10:08:07.259Z" }, - { url = "https://files.pythonhosted.org/packages/90/81/db56870c997805bf2b0f6eeeb2d68458bf4654652dccdcf1bf7a42d80903/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:f7fe3dbe871294ba70d789be16b6e7e52b418311e166e0e3cba9522f0f437fb1", size = 34336685, upload-time = "2025-10-24T10:07:11.47Z" }, - { url = "https://files.pythonhosted.org/packages/1c/98/0727947f199aba8a120f47dfc229eeb05df15bcd7a6f1b669e9f882afc58/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ba95112d15fd4f1105fb2402c4eab9068f0554435e9b7085924bcfaac2cc306f", size = 36032158, upload-time = "2025-10-24T10:07:18.626Z" }, - { url = "https://files.pythonhosted.org/packages/96/b4/9babdef9c01720a0785945c7cf550e4acd0ebcd7bdd2e6f0aa7981fa85e2/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c064e28361c05d72eed8e744c9605cbd6d2bb7481a511c74071fd9b24bc65d7d", size = 44892060, upload-time = "2025-10-24T10:07:26.002Z" }, - { url = "https://files.pythonhosted.org/packages/f8/ca/2f8804edd6279f78a37062d813de3f16f29183874447ef6d1aadbb4efa0f/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6f9762274496c244d951c819348afbcf212714902742225f649cf02823a6a10f", size = 47504395, upload-time = "2025-10-24T10:07:34.09Z" }, - { url = "https://files.pythonhosted.org/packages/b9/f0/77aa5198fd3943682b2e4faaf179a674f0edea0d55d326d83cb2277d9363/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a9d9ffdc2ab696f6b15b4d1f7cec6658e1d788124418cb30030afbae31c64746", size = 48066216, upload-time = "2025-10-24T10:07:43.528Z" }, - { url = "https://files.pythonhosted.org/packages/79/87/a1937b6e78b2aff18b706d738c9e46ade5bfcf11b294e39c87706a0089ac/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ec1a15968a9d80da01e1d30349b2b0d7cc91e96588ee324ce1b5228175043e95", size = 50288552, upload-time = "2025-10-24T10:07:53.519Z" }, - { url = "https://files.pythonhosted.org/packages/60/ae/b5a5811e11f25788ccfdaa8f26b6791c9807119dffcf80514505527c384c/pyarrow-22.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:bba208d9c7decf9961998edf5c65e3ea4355d5818dd6cd0f6809bec1afb951cc", size = 28262504, upload-time = "2025-10-24T10:08:00.932Z" }, - { url = "https://files.pythonhosted.org/packages/bd/b0/0fa4d28a8edb42b0a7144edd20befd04173ac79819547216f8a9f36f9e50/pyarrow-22.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:9bddc2cade6561f6820d4cd73f99a0243532ad506bc510a75a5a65a522b2d74d", size = 34224062, upload-time = "2025-10-24T10:08:14.101Z" }, - { url = "https://files.pythonhosted.org/packages/0f/a8/7a719076b3c1be0acef56a07220c586f25cd24de0e3f3102b438d18ae5df/pyarrow-22.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e70ff90c64419709d38c8932ea9fe1cc98415c4f87ea8da81719e43f02534bc9", size = 35990057, upload-time = "2025-10-24T10:08:21.842Z" }, - { url = "https://files.pythonhosted.org/packages/89/3c/359ed54c93b47fb6fe30ed16cdf50e3f0e8b9ccfb11b86218c3619ae50a8/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:92843c305330aa94a36e706c16209cd4df274693e777ca47112617db7d0ef3d7", size = 45068002, upload-time = "2025-10-24T10:08:29.034Z" }, - { url = "https://files.pythonhosted.org/packages/55/fc/4945896cc8638536ee787a3bd6ce7cec8ec9acf452d78ec39ab328efa0a1/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:6dda1ddac033d27421c20d7a7943eec60be44e0db4e079f33cc5af3b8280ccde", size = 47737765, upload-time = "2025-10-24T10:08:38.559Z" }, - { url = "https://files.pythonhosted.org/packages/cd/5e/7cb7edeb2abfaa1f79b5d5eb89432356155c8426f75d3753cbcb9592c0fd/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:84378110dd9a6c06323b41b56e129c504d157d1a983ce8f5443761eb5256bafc", size = 48048139, upload-time = "2025-10-24T10:08:46.784Z" }, - { url = "https://files.pythonhosted.org/packages/88/c6/546baa7c48185f5e9d6e59277c4b19f30f48c94d9dd938c2a80d4d6b067c/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:854794239111d2b88b40b6ef92aa478024d1e5074f364033e73e21e3f76b25e0", size = 50314244, upload-time = "2025-10-24T10:08:55.771Z" }, - { url = "https://files.pythonhosted.org/packages/3c/79/755ff2d145aafec8d347bf18f95e4e81c00127f06d080135dfc86aea417c/pyarrow-22.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:b883fe6fd85adad7932b3271c38ac289c65b7337c2c132e9569f9d3940620730", size = 28757501, upload-time = "2025-10-24T10:09:59.891Z" }, - { url = "https://files.pythonhosted.org/packages/0e/d2/237d75ac28ced3147912954e3c1a174df43a95f4f88e467809118a8165e0/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:7a820d8ae11facf32585507c11f04e3f38343c1e784c9b5a8b1da5c930547fe2", size = 34355506, upload-time = "2025-10-24T10:09:02.953Z" }, - { url = "https://files.pythonhosted.org/packages/1e/2c/733dfffe6d3069740f98e57ff81007809067d68626c5faef293434d11bd6/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:c6ec3675d98915bf1ec8b3c7986422682f7232ea76cad276f4c8abd5b7319b70", size = 36047312, upload-time = "2025-10-24T10:09:10.334Z" }, - { url = "https://files.pythonhosted.org/packages/7c/2b/29d6e3782dc1f299727462c1543af357a0f2c1d3c160ce199950d9ca51eb/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3e739edd001b04f654b166204fc7a9de896cf6007eaff33409ee9e50ceaff754", size = 45081609, upload-time = "2025-10-24T10:09:18.61Z" }, - { url = "https://files.pythonhosted.org/packages/8d/42/aa9355ecc05997915af1b7b947a7f66c02dcaa927f3203b87871c114ba10/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7388ac685cab5b279a41dfe0a6ccd99e4dbf322edfb63e02fc0443bf24134e91", size = 47703663, upload-time = "2025-10-24T10:09:27.369Z" }, - { url = "https://files.pythonhosted.org/packages/ee/62/45abedde480168e83a1de005b7b7043fd553321c1e8c5a9a114425f64842/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f633074f36dbc33d5c05b5dc75371e5660f1dbf9c8b1d95669def05e5425989c", size = 48066543, upload-time = "2025-10-24T10:09:34.908Z" }, - { url = "https://files.pythonhosted.org/packages/84/e9/7878940a5b072e4f3bf998770acafeae13b267f9893af5f6d4ab3904b67e/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4c19236ae2402a8663a2c8f21f1870a03cc57f0bef7e4b6eb3238cc82944de80", size = 50288838, upload-time = "2025-10-24T10:09:44.394Z" }, - { url = "https://files.pythonhosted.org/packages/7b/03/f335d6c52b4a4761bcc83499789a1e2e16d9d201a58c327a9b5cc9a41bd9/pyarrow-22.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0c34fe18094686194f204a3b1787a27456897d8a2d62caf84b61e8dfbc0252ae", size = 29185594, upload-time = "2025-10-24T10:09:53.111Z" }, -] - -[[package]] -name = "pyasn1" -version = "0.6.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" }, -] - -[[package]] -name = "pyasn1-modules" -version = "0.4.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyasn1" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, + "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.14' and sys_platform == 'win32'", +] +sdist = { url = "https://files.pythonhosted.org/packages/91/13/13e1069b351bdc3881266e11147ffccf687505dbb0ea74036237f5d454a5/pyarrow-24.0.0.tar.gz", hash = "sha256:85fe721a14dd823aca09127acbb06c3ca723efbd436c004f16bca601b04dcc83", size = 1180261, upload-time = "2026-04-21T10:51:25.837Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/bf/a34fee1d624152124fa8355c42f34195ad5fe5233ce5bb87946432047d52/pyarrow-24.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:7c2b98645d576a0b9616892ead22b64a83a5f043c5e2ca15ebcefcb5b70c80cb", size = 35076681, upload-time = "2026-04-21T08:51:46.845Z" }, + { url = "https://files.pythonhosted.org/packages/1d/41/64180033d7027afce12dc96d0fe1f504c6fa112190582b458acea2399530/pyarrow-24.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:644a246325b8c69c595ad1dd4b463eba4b0cdb731370e4a86137d433208d6147", size = 36684260, upload-time = "2026-04-21T08:51:53.642Z" }, + { url = "https://files.pythonhosted.org/packages/57/02/9b9320e673dd8a99411fac78690f3df92f6dd6f59754c750110bca66d64e/pyarrow-24.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:3a577bd840ca83f646f0a625dbc571dba7044c43c2d1503afc378b570954345c", size = 45698566, upload-time = "2026-04-21T10:46:02.133Z" }, + { url = "https://files.pythonhosted.org/packages/67/33/f75e91b9a64c3f33c787e263c93b871ad91b8a4a68c1d5cebddd9840e835/pyarrow-24.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:e3268e43984d0b1a185c89b4cfff282a7ead12fc93f56cfd7088bdbcbe727041", size = 48835562, upload-time = "2026-04-21T10:46:10.278Z" }, + { url = "https://files.pythonhosted.org/packages/a5/63/097510448e47e4091faa41c43ba92f97cecaab8f4535b56a3d149578f634/pyarrow-24.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:2392d954fcb920f42d230284b677605e4e2fbb11f2821e823e642abd67fbb491", size = 49394997, upload-time = "2026-04-21T10:46:18.08Z" }, + { url = "https://files.pythonhosted.org/packages/60/6b/c047d6222ab279024a062742d1807e2fbaf27bba88a98637299ff47b9236/pyarrow-24.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bec9373df11544592b0ba7ec2af0e35059e5f0e7647c6183a854dedd193298f1", size = 51911424, upload-time = "2026-04-21T10:46:25.347Z" }, + { url = "https://files.pythonhosted.org/packages/3a/ba/464cc70761c2a525d97ebd84e21c31ebd47f3ef4bdcee117009f51c46f24/pyarrow-24.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:c42ab9439498270139cc63e18847a02afe5c8b3ed9c931266533cfe378bd3591", size = 27251730, upload-time = "2026-04-21T10:46:30.913Z" }, + { url = "https://files.pythonhosted.org/packages/62/c9/a47ab7ece0d86cbe6678418a0fbd1ac4bb493b9184a3891dfa0e7f287ae0/pyarrow-24.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b0e131f880cda8d04e076cee175a46fc0e8bc8b65c99c6c09dff6669335fde74", size = 35068898, upload-time = "2026-04-21T10:46:36.599Z" }, + { url = "https://files.pythonhosted.org/packages/d1/bc/8db86617a9a58008acf8913d6fed68ea2a46acb6de928db28d724c891a68/pyarrow-24.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:1b2fe7f9a5566401a0ef2571f197eb92358925c1f0c8dba305d6e43ea0871bb3", size = 36679915, upload-time = "2026-04-21T10:46:42.602Z" }, + { url = "https://files.pythonhosted.org/packages/eb/8e/fb178720400ef69db251eb4a9c3ccf4af269bc1feb5055529b8fc87170d1/pyarrow-24.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:0b3537c00fb8d384f15ac1e79b6eb6db04a16514c8c1d22e59a9b95c8ba42868", size = 45697931, upload-time = "2026-04-21T10:46:48.403Z" }, + { url = "https://files.pythonhosted.org/packages/f3/27/99c42abe8e21b44f4917f62631f3aa31404882a2c41d8a4cd5c110e13d52/pyarrow-24.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:14e31a3c9e35f1ab6356c6378f6f72830e6d2d5f1791df3774a7b097d18a6a1e", size = 48837449, upload-time = "2026-04-21T10:46:55.329Z" }, + { url = "https://files.pythonhosted.org/packages/36/b6/333749e2666e9032891125bf9c691146e92901bece62030ac1430e2e7c88/pyarrow-24.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7d9a514e73bc42711e6a35aaccf3587c520024fe0a25d830a1a8a27c15f4f57", size = 49395949, upload-time = "2026-04-21T10:47:01.869Z" }, + { url = "https://files.pythonhosted.org/packages/17/25/c5201706a2dd374e8ba6ee3fd7a8c89fb7ffc16eed5217a91fd2bd7f7626/pyarrow-24.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b196eb3f931862af3fa84c2a253514d859c08e0d8fe020e07be12e75a5a9780c", size = 51912986, upload-time = "2026-04-21T10:47:09.872Z" }, + { url = "https://files.pythonhosted.org/packages/f8/d2/4d1bbba65320b21a49678d6fbdc6ff7c649251359fdcfc03568c4136231d/pyarrow-24.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:35405aecb474e683fb36af650618fd5340ee5471fc65a21b36076a18bbc6c981", size = 27255371, upload-time = "2026-04-21T10:47:15.943Z" }, + { url = "https://files.pythonhosted.org/packages/b4/a9/9686d9f07837f91f775e8932659192e02c74f9d8920524b480b85212cc68/pyarrow-24.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:6233c9ed9ab9d1db47de57d9753256d9dcffbf42db341576099f0fd9f6bf4810", size = 34981559, upload-time = "2026-04-21T10:47:22.17Z" }, + { url = "https://files.pythonhosted.org/packages/80/b6/0ddf0e9b6ead3474ab087ae598c76b031fc45532bf6a63f3a553440fb258/pyarrow-24.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:f7616236ec1bc2b15bfdec22a71ab38851c86f8f05ff64f379e1278cf20c634a", size = 36663654, upload-time = "2026-04-21T10:47:28.315Z" }, + { url = "https://files.pythonhosted.org/packages/7c/3b/926382efe8ce27ba729071d3566ade6dfb86bdf112f366000196b2f5780a/pyarrow-24.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:1617043b99bd33e5318ae18eb2919af09c71322ef1ca46566cdafc6e6712fb66", size = 45679394, upload-time = "2026-04-21T10:47:34.821Z" }, + { url = "https://files.pythonhosted.org/packages/b3/7a/829f7d9dfd37c207206081d6dad474d81dde29952401f07f2ba507814818/pyarrow-24.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6165461f55ef6314f026de6638d661188e3455d3ec49834556a0ebbdbace18bb", size = 48863122, upload-time = "2026-04-21T10:47:42.056Z" }, + { url = "https://files.pythonhosted.org/packages/5f/e8/f88ce625fe8babaae64e8db2d417c7653adb3019b08aae85c5ed787dc816/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3b13dedfe76a0ad2d1d859b0811b53827a4e9d93a0bcb05cf59333ab4980cc7e", size = 49376032, upload-time = "2026-04-21T10:47:48.967Z" }, + { url = "https://files.pythonhosted.org/packages/36/7a/82c363caa145fff88fb475da50d3bf52bb024f61917be5424c3392eaf878/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:25ea65d868eb04015cd18e6df2fbe98f07e5bda2abefabcb88fce39a947716f6", size = 51929490, upload-time = "2026-04-21T10:47:55.981Z" }, + { url = "https://files.pythonhosted.org/packages/66/1c/e3e72c8014ad2743ca64a701652c733cc5cbcee15c0463a32a8c55518d9e/pyarrow-24.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:295f0a7f2e242dabd513737cf076007dc5b2d59237e3eca37b05c0c6446f3826", size = 27355660, upload-time = "2026-04-21T10:48:01.718Z" }, + { url = "https://files.pythonhosted.org/packages/6f/d3/a1abf004482026ddc17f4503db227787fa3cfe41ec5091ff20e4fea55e57/pyarrow-24.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:02b001b3ed4723caa44f6cd1af2d5c86aa2cf9971dacc2ffa55b21237713dfba", size = 34976759, upload-time = "2026-04-21T10:48:07.258Z" }, + { url = "https://files.pythonhosted.org/packages/4f/4a/34f0a36d28a2dd32225301b79daad44e243dc1a2bb77d43b60749be255c4/pyarrow-24.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:04920d6a71aabd08a0417709efce97d45ea8e6fb733d9ca9ecffb13c67839f68", size = 36658471, upload-time = "2026-04-21T10:48:13.347Z" }, + { url = "https://files.pythonhosted.org/packages/1f/78/543b94712ae8bb1a6023bcc1acf1a740fbff8286747c289cd9468fced2a5/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a964266397740257f16f7bb2e4f08a0c81454004beab8ff59dd531b73610e9f2", size = 45675981, upload-time = "2026-04-21T10:48:20.201Z" }, + { url = "https://files.pythonhosted.org/packages/84/9f/8fb7c222b100d314137fa40ec050de56cd8c6d957d1cfff685ce72f15b17/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6f066b179d68c413374294bc1735f68475457c933258df594443bb9d88ddc2a0", size = 48859172, upload-time = "2026-04-21T10:48:27.541Z" }, + { url = "https://files.pythonhosted.org/packages/a7/d3/1ea72538e6c8b3b475ed78d1049a2c518e655761ea50fe1171fc855fcab7/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1183baeb14c5f587b1ec52831e665718ce632caab84b7cd6b85fd44f96114495", size = 49385733, upload-time = "2026-04-21T10:48:34.7Z" }, + { url = "https://files.pythonhosted.org/packages/c3/be/c3d8b06a1ba35f2260f8e1f771abbee7d5e345c0937aab90675706b1690a/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:806f24b4085453c197a5078218d1ee08783ebbba271badd153d1ae22a3ee804f", size = 51934335, upload-time = "2026-04-21T10:48:42.099Z" }, + { url = "https://files.pythonhosted.org/packages/9c/62/89e07a1e7329d2cde3e3c6994ba0839a24977a2beda8be6005ea3d860b99/pyarrow-24.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:e4505fc6583f7b05ab854934896bcac8253b04ac1171a77dfb73efef92076d91", size = 27271748, upload-time = "2026-04-21T10:49:42.532Z" }, + { url = "https://files.pythonhosted.org/packages/17/1a/cff3a59f80b5b1658549d46611b67163f65e0664431c076ad728bf9d5af4/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:1a4e45017efbf115032e4475ee876d525e0e36c742214fbe405332480ecd6275", size = 35238554, upload-time = "2026-04-21T10:48:48.526Z" }, + { url = "https://files.pythonhosted.org/packages/a8/99/cce0f42a327bfef2c420fb6078a3eb834826e5d6697bf3009fe11d2ad051/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:7986f1fa71cee060ad00758bcc79d3a93bab8559bf978fab9e53472a2e25a17b", size = 36782301, upload-time = "2026-04-21T10:48:55.181Z" }, + { url = "https://files.pythonhosted.org/packages/2a/66/8e560d5ff6793ca29aca213c53eec0dd482dd46cb93b2819e5aab52e4252/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d3e0b61e8efb24ed38898e5cdc5fffa9124be480008d401a1f8071500494ae42", size = 45721929, upload-time = "2026-04-21T10:49:03.676Z" }, + { url = "https://files.pythonhosted.org/packages/27/0c/a26e25505d030716e078d9f16eb74973cbf0b33b672884e9f9da1c83b871/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:55a3bc1e3df3b5567b7d27ef551b2283f0c68a5e86f1cd56abc569da4f31335b", size = 48825365, upload-time = "2026-04-21T10:49:11.714Z" }, + { url = "https://files.pythonhosted.org/packages/5f/eb/771f9ecb0c65e73fe9dccdd1717901b9594f08c4515d000c7c62df573811/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:641f795b361874ac9da5294f8f443dfdbee355cf2bd9e3b8d97aaac2306b9b37", size = 49451819, upload-time = "2026-04-21T10:49:21.474Z" }, + { url = "https://files.pythonhosted.org/packages/48/da/61ae89a88732f5a785646f3ec6125dbb640fa98a540eb2b9889caa561403/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8adc8e6ce5fccf5dc707046ae4914fd537def529709cc0d285d37a7f9cd442ca", size = 51909252, upload-time = "2026-04-21T10:49:31.164Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1a/8dd5cafab7b66573fa91c03d06d213356ad4edd71813aa75e08ce2b3a844/pyarrow-24.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:9b18371ad2f44044b81a8d23bc2d8a9b6a6226dca775e8e16cfee640473d6c5d", size = 27388127, upload-time = "2026-04-21T10:49:37.334Z" }, + { url = "https://files.pythonhosted.org/packages/ad/80/d022a34ff05d2cbedd8ccf841fc1f532ecfa9eb5ed1711b56d0e0ea71fc9/pyarrow-24.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:1cc9057f0319e26333b357e17f3c2c022f1a83739b48a88b25bfd5fa2dc18838", size = 35007997, upload-time = "2026-04-21T10:49:48.796Z" }, + { url = "https://files.pythonhosted.org/packages/1a/ff/f01485fda6f4e5d441afb8dd5e7681e4db18826c1e271852f5d3957d6a80/pyarrow-24.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e6f1278ee4785b6db21229374a1c9e54ec7c549de5d1efc9630b6207de7e170b", size = 36678720, upload-time = "2026-04-21T10:49:55.858Z" }, + { url = "https://files.pythonhosted.org/packages/9e/c2/2d2d5fea814237923f71b36495211f20b43a1576f9a4d6da7e751a64ec6f/pyarrow-24.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:adbbedc55506cbdabb830890444fb856bfb0060c46c6f8026c6c2f2cf86ae795", size = 45741852, upload-time = "2026-04-21T10:50:04.624Z" }, + { url = "https://files.pythonhosted.org/packages/8e/3a/28ba9c1c1ebdbb5f1b94dfebb46f207e52e6a554b7fe4132540fde29a3a0/pyarrow-24.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ae8a1145af31d903fa9bb166824d7abe9b4681a000b0159c9fb99c11bc11ad26", size = 48889852, upload-time = "2026-04-21T10:50:12.293Z" }, + { url = "https://files.pythonhosted.org/packages/df/51/4a389acfd31dca009f8fb82d7f510bb4130f2b3a8e18cf00194d0687d8ac/pyarrow-24.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d7027eba1df3b2069e2e8d80f644fa0918b68c46432af3d088ddd390d063ecde", size = 49445207, upload-time = "2026-04-21T10:50:20.677Z" }, + { url = "https://files.pythonhosted.org/packages/19/4b/0bab2b23d2ae901b1b9a03c0efd4b2d070256f8ce3fc43f6e58c167b2081/pyarrow-24.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e56a1ffe9bf7b727432b89104cc0849c21582949dd7bdcb34f17b2001a351a76", size = 51954117, upload-time = "2026-04-21T10:50:29.14Z" }, + { url = "https://files.pythonhosted.org/packages/29/88/f4e9145da0417b3d2c12035a8492b35ff4a3dbc653e614fcfb51d9dedb38/pyarrow-24.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:38be1808cdd068605b787e6ca9119b27eb275a0234e50212c3492331680c3b1e", size = 28001155, upload-time = "2026-04-21T10:51:22.337Z" }, + { url = "https://files.pythonhosted.org/packages/79/4f/46a49a63f43526da895b1a45bbb51d5baf8e4d77159f8528fc3e5490007f/pyarrow-24.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:418e48ce50a45a6a6c73c454677203a9c75c966cb1e92ca3370959185f197a05", size = 35250387, upload-time = "2026-04-21T10:50:35.552Z" }, + { url = "https://files.pythonhosted.org/packages/a0/da/d5e0cd5ef00796922404806d5f00325cdadc3441ce2c13fe7115f2df9a64/pyarrow-24.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:2f16197705a230a78270cdd4ea8a1d57e86b2fdcbc34a1f6aebc72e65c986f9a", size = 36797102, upload-time = "2026-04-21T10:50:42.417Z" }, + { url = "https://files.pythonhosted.org/packages/34/c7/5904145b0a593a05236c882933d439b5720f0a145381179063722fbfc123/pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:fb24ac194bfc5e86839d7dcd52092ee31e5fe6733fe11f5e3b06ef0812b20072", size = 45745118, upload-time = "2026-04-21T10:50:49.324Z" }, + { url = "https://files.pythonhosted.org/packages/13/d3/cca42fe166d1c6e4d5b80e530b7949104d10e17508a90ae202dac205ce2a/pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:9700ebd9a51f5895ce75ff4ac4b3c47a7d4b42bc618be8e713e5d56bacf5f931", size = 48844765, upload-time = "2026-04-21T10:50:55.579Z" }, + { url = "https://files.pythonhosted.org/packages/b0/49/942c3b79878ba928324d1e17c274ed84581db8c0a749b24bcf4cbdf15bd3/pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d8ddd2768da81d3ee08cfea9b597f4abb4e8e1dc8ae7e204b608d23a0d3ab699", size = 49471890, upload-time = "2026-04-21T10:51:02.439Z" }, + { url = "https://files.pythonhosted.org/packages/76/97/ff71431000a75d84135a1ace5ca4ba11726a231a8007bbb320a4c54075d5/pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:61a3d7eaa97a14768b542f3d284dc6400dd2470d9f080708b13cd46b6ae18136", size = 51932250, upload-time = "2026-04-21T10:51:10.576Z" }, + { url = "https://files.pythonhosted.org/packages/51/be/6f79d55816d5c22557cf27533543d5d70dfe692adfbee4b99f2760674f38/pyarrow-24.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:c91d00057f23b8d353039520dc3a6c09d8608164c692e9f59a175a42b2ae0c19", size = 28131282, upload-time = "2026-04-21T10:51:16.815Z" }, ] [[package]] @@ -5505,6 +5185,7 @@ dependencies = [ { name = "openai" }, { name = "openpyxl" }, { name = "pillow" }, + { name = "pyarrow", version = "24.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, { name = "pydantic" }, { name = "pyjwt", extra = ["crypto"] }, { name = "pyodbc" }, @@ -5532,12 +5213,10 @@ all = [ { name = "av" }, { name = "azure-ai-ml" }, { name = "azure-cognitiveservices-speech" }, - { name = "azureml-mlflow" }, { name = "flask" }, { name = "ipykernel" }, { name = "jupyter" }, { name = "ml-collections" }, - { name = "mlflow" }, { name = "ollama" }, { name = "opencv-python" }, { name = "playwright" }, @@ -5552,9 +5231,7 @@ fairness-bias = [ gcg = [ { name = "accelerate" }, { name = "azure-ai-ml" }, - { name = "azureml-mlflow" }, { name = "ml-collections" }, - { name = "mlflow" }, { name = "sentencepiece" }, { name = "torch" }, ] @@ -5626,8 +5303,6 @@ requires-dist = [ { name = "azure-core", specifier = ">=1.38.0" }, { name = "azure-identity", specifier = ">=1.19.0" }, { name = "azure-storage-blob", specifier = ">=12.19.0" }, - { name = "azureml-mlflow", marker = "extra == 'all'", specifier = ">=1.60.0" }, - { name = "azureml-mlflow", marker = "extra == 'gcg'", specifier = ">=1.60.0" }, { name = "base2048", specifier = ">=0.1.3" }, { name = "colorama", specifier = ">=0.4.6" }, { name = "confusable-homoglyphs", specifier = ">=3.3.1" }, @@ -5643,8 +5318,6 @@ requires-dist = [ { name = "jupyter", marker = "extra == 'all'", specifier = ">=1.1.1" }, { name = "ml-collections", marker = "extra == 'all'", specifier = ">=1.1.0" }, { name = "ml-collections", marker = "extra == 'gcg'", specifier = ">=1.1.0" }, - { name = "mlflow", marker = "extra == 'all'", specifier = ">=3.11.1" }, - { name = "mlflow", marker = "extra == 'gcg'", specifier = ">=3.11.1" }, { name = "numpy", marker = "python_full_version < '3.14'", specifier = ">=1.26.0" }, { name = "numpy", marker = "python_full_version >= '3.14'", specifier = ">=2.3.0" }, { name = "ollama", marker = "extra == 'all'", specifier = ">=0.5.1" }, @@ -5656,6 +5329,7 @@ requires-dist = [ { name = "pillow", specifier = ">=12.2.0" }, { name = "playwright", marker = "extra == 'all'", specifier = ">=1.49.0" }, { name = "playwright", marker = "extra == 'playwright'", specifier = ">=1.49.0" }, + { name = "pyarrow", marker = "python_full_version >= '3.14'", specifier = ">=22.0.0" }, { name = "pydantic", specifier = ">=2.11.5" }, { name = "pyjwt", extras = ["crypto"], specifier = ">=2.8.0" }, { name = "pyodbc", specifier = ">=5.1.0" }, @@ -5843,28 +5517,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, ] -[[package]] -name = "pywin32" -version = "311" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/40/44efbb0dfbd33aca6a6483191dae0716070ed99e2ecb0c53683f400a0b4f/pywin32-311-cp310-cp310-win32.whl", hash = "sha256:d03ff496d2a0cd4a5893504789d4a15399133fe82517455e78bad62efbb7f0a3", size = 8760432, upload-time = "2025-07-14T20:13:05.9Z" }, - { url = "https://files.pythonhosted.org/packages/5e/bf/360243b1e953bd254a82f12653974be395ba880e7ec23e3731d9f73921cc/pywin32-311-cp310-cp310-win_amd64.whl", hash = "sha256:797c2772017851984b97180b0bebe4b620bb86328e8a884bb626156295a63b3b", size = 9590103, upload-time = "2025-07-14T20:13:07.698Z" }, - { url = "https://files.pythonhosted.org/packages/57/38/d290720e6f138086fb3d5ffe0b6caa019a791dd57866940c82e4eeaf2012/pywin32-311-cp310-cp310-win_arm64.whl", hash = "sha256:0502d1facf1fed4839a9a51ccbcc63d952cf318f78ffc00a7e78528ac27d7a2b", size = 8778557, upload-time = "2025-07-14T20:13:11.11Z" }, - { url = "https://files.pythonhosted.org/packages/7c/af/449a6a91e5d6db51420875c54f6aff7c97a86a3b13a0b4f1a5c13b988de3/pywin32-311-cp311-cp311-win32.whl", hash = "sha256:184eb5e436dea364dcd3d2316d577d625c0351bf237c4e9a5fabbcfa5a58b151", size = 8697031, upload-time = "2025-07-14T20:13:13.266Z" }, - { url = "https://files.pythonhosted.org/packages/51/8f/9bb81dd5bb77d22243d33c8397f09377056d5c687aa6d4042bea7fbf8364/pywin32-311-cp311-cp311-win_amd64.whl", hash = "sha256:3ce80b34b22b17ccbd937a6e78e7225d80c52f5ab9940fe0506a1a16f3dab503", size = 9508308, upload-time = "2025-07-14T20:13:15.147Z" }, - { url = "https://files.pythonhosted.org/packages/44/7b/9c2ab54f74a138c491aba1b1cd0795ba61f144c711daea84a88b63dc0f6c/pywin32-311-cp311-cp311-win_arm64.whl", hash = "sha256:a733f1388e1a842abb67ffa8e7aad0e70ac519e09b0f6a784e65a136ec7cefd2", size = 8703930, upload-time = "2025-07-14T20:13:16.945Z" }, - { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" }, - { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" }, - { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" }, - { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" }, - { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" }, - { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" }, - { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" }, - { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" }, - { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" }, -] - [[package]] name = "pywinpty" version = "3.0.2" @@ -6024,10 +5676,12 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version == '3.11.*' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version == '3.11.*' and sys_platform == 'win32'", "python_full_version < '3.11' and sys_platform == 'darwin'", "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version < '3.11' and sys_platform == 'win32'", ] dependencies = [ { name = "attrs", marker = "python_full_version < '3.12'" }, @@ -6048,8 +5702,10 @@ resolution-markers = [ "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'darwin'", "python_full_version >= '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", - "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.14' and sys_platform == 'win32'", + "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'win32'", ] dependencies = [ { name = "attrs", marker = "python_full_version >= '3.12'" }, @@ -6389,18 +6045,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" }, ] -[[package]] -name = "rsa" -version = "4.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyasn1" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034, upload-time = "2025-04-16T09:51:18.218Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, -] - [[package]] name = "ruff" version = "0.14.10" @@ -6453,117 +6097,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/5b/632a58724221ef03d78ab65062e82a1010e1bef8e8e0b9d7c6d7b8044841/safetensors-0.7.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:473b32699f4200e69801bf5abf93f1a4ecd432a70984df164fc22ccf39c4a6f3", size = 531885, upload-time = "2025-11-19T15:18:27.146Z" }, ] -[[package]] -name = "scikit-learn" -version = "1.7.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.11' and sys_platform == 'darwin'", - "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", -] -dependencies = [ - { name = "joblib", marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "threadpoolctl", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/98/c2/a7855e41c9d285dfe86dc50b250978105dce513d6e459ea66a6aeb0e1e0c/scikit_learn-1.7.2.tar.gz", hash = "sha256:20e9e49ecd130598f1ca38a1d85090e1a600147b9c02fa6f15d69cb53d968fda", size = 7193136, upload-time = "2025-09-09T08:21:29.075Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ba/3e/daed796fd69cce768b8788401cc464ea90b306fb196ae1ffed0b98182859/scikit_learn-1.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b33579c10a3081d076ab403df4a4190da4f4432d443521674637677dc91e61f", size = 9336221, upload-time = "2025-09-09T08:20:19.328Z" }, - { url = "https://files.pythonhosted.org/packages/1c/ce/af9d99533b24c55ff4e18d9b7b4d9919bbc6cd8f22fe7a7be01519a347d5/scikit_learn-1.7.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:36749fb62b3d961b1ce4fedf08fa57a1986cd409eff2d783bca5d4b9b5fce51c", size = 8653834, upload-time = "2025-09-09T08:20:22.073Z" }, - { url = "https://files.pythonhosted.org/packages/58/0e/8c2a03d518fb6bd0b6b0d4b114c63d5f1db01ff0f9925d8eb10960d01c01/scikit_learn-1.7.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7a58814265dfc52b3295b1900cfb5701589d30a8bb026c7540f1e9d3499d5ec8", size = 9660938, upload-time = "2025-09-09T08:20:24.327Z" }, - { url = "https://files.pythonhosted.org/packages/2b/75/4311605069b5d220e7cf5adabb38535bd96f0079313cdbb04b291479b22a/scikit_learn-1.7.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a847fea807e278f821a0406ca01e387f97653e284ecbd9750e3ee7c90347f18", size = 9477818, upload-time = "2025-09-09T08:20:26.845Z" }, - { url = "https://files.pythonhosted.org/packages/7f/9b/87961813c34adbca21a6b3f6b2bea344c43b30217a6d24cc437c6147f3e8/scikit_learn-1.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:ca250e6836d10e6f402436d6463d6c0e4d8e0234cfb6a9a47835bd392b852ce5", size = 8886969, upload-time = "2025-09-09T08:20:29.329Z" }, - { url = "https://files.pythonhosted.org/packages/43/83/564e141eef908a5863a54da8ca342a137f45a0bfb71d1d79704c9894c9d1/scikit_learn-1.7.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7509693451651cd7361d30ce4e86a1347493554f172b1c72a39300fa2aea79e", size = 9331967, upload-time = "2025-09-09T08:20:32.421Z" }, - { url = "https://files.pythonhosted.org/packages/18/d6/ba863a4171ac9d7314c4d3fc251f015704a2caeee41ced89f321c049ed83/scikit_learn-1.7.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:0486c8f827c2e7b64837c731c8feff72c0bd2b998067a8a9cbc10643c31f0fe1", size = 8648645, upload-time = "2025-09-09T08:20:34.436Z" }, - { url = "https://files.pythonhosted.org/packages/ef/0e/97dbca66347b8cf0ea8b529e6bb9367e337ba2e8be0ef5c1a545232abfde/scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:89877e19a80c7b11a2891a27c21c4894fb18e2c2e077815bcade10d34287b20d", size = 9715424, upload-time = "2025-09-09T08:20:36.776Z" }, - { url = "https://files.pythonhosted.org/packages/f7/32/1f3b22e3207e1d2c883a7e09abb956362e7d1bd2f14458c7de258a26ac15/scikit_learn-1.7.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8da8bf89d4d79aaec192d2bda62f9b56ae4e5b4ef93b6a56b5de4977e375c1f1", size = 9509234, upload-time = "2025-09-09T08:20:38.957Z" }, - { url = "https://files.pythonhosted.org/packages/9f/71/34ddbd21f1da67c7a768146968b4d0220ee6831e4bcbad3e03dd3eae88b6/scikit_learn-1.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:9b7ed8d58725030568523e937c43e56bc01cadb478fc43c042a9aca1dacb3ba1", size = 8894244, upload-time = "2025-09-09T08:20:41.166Z" }, - { url = "https://files.pythonhosted.org/packages/a7/aa/3996e2196075689afb9fce0410ebdb4a09099d7964d061d7213700204409/scikit_learn-1.7.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8d91a97fa2b706943822398ab943cde71858a50245e31bc71dba62aab1d60a96", size = 9259818, upload-time = "2025-09-09T08:20:43.19Z" }, - { url = "https://files.pythonhosted.org/packages/43/5d/779320063e88af9c4a7c2cf463ff11c21ac9c8bd730c4a294b0000b666c9/scikit_learn-1.7.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:acbc0f5fd2edd3432a22c69bed78e837c70cf896cd7993d71d51ba6708507476", size = 8636997, upload-time = "2025-09-09T08:20:45.468Z" }, - { url = "https://files.pythonhosted.org/packages/5c/d0/0c577d9325b05594fdd33aa970bf53fb673f051a45496842caee13cfd7fe/scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e5bf3d930aee75a65478df91ac1225ff89cd28e9ac7bd1196853a9229b6adb0b", size = 9478381, upload-time = "2025-09-09T08:20:47.982Z" }, - { url = "https://files.pythonhosted.org/packages/82/70/8bf44b933837ba8494ca0fc9a9ab60f1c13b062ad0197f60a56e2fc4c43e/scikit_learn-1.7.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4d6e9deed1a47aca9fe2f267ab8e8fe82ee20b4526b2c0cd9e135cea10feb44", size = 9300296, upload-time = "2025-09-09T08:20:50.366Z" }, - { url = "https://files.pythonhosted.org/packages/c6/99/ed35197a158f1fdc2fe7c3680e9c70d0128f662e1fee4ed495f4b5e13db0/scikit_learn-1.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:6088aa475f0785e01bcf8529f55280a3d7d298679f50c0bb70a2364a82d0b290", size = 8731256, upload-time = "2025-09-09T08:20:52.627Z" }, - { url = "https://files.pythonhosted.org/packages/ae/93/a3038cb0293037fd335f77f31fe053b89c72f17b1c8908c576c29d953e84/scikit_learn-1.7.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b7dacaa05e5d76759fb071558a8b5130f4845166d88654a0f9bdf3eb57851b7", size = 9212382, upload-time = "2025-09-09T08:20:54.731Z" }, - { url = "https://files.pythonhosted.org/packages/40/dd/9a88879b0c1104259136146e4742026b52df8540c39fec21a6383f8292c7/scikit_learn-1.7.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:abebbd61ad9e1deed54cca45caea8ad5f79e1b93173dece40bb8e0c658dbe6fe", size = 8592042, upload-time = "2025-09-09T08:20:57.313Z" }, - { url = "https://files.pythonhosted.org/packages/46/af/c5e286471b7d10871b811b72ae794ac5fe2989c0a2df07f0ec723030f5f5/scikit_learn-1.7.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:502c18e39849c0ea1a5d681af1dbcf15f6cce601aebb657aabbfe84133c1907f", size = 9434180, upload-time = "2025-09-09T08:20:59.671Z" }, - { url = "https://files.pythonhosted.org/packages/f1/fd/df59faa53312d585023b2da27e866524ffb8faf87a68516c23896c718320/scikit_learn-1.7.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a4c328a71785382fe3fe676a9ecf2c86189249beff90bf85e22bdb7efaf9ae0", size = 9283660, upload-time = "2025-09-09T08:21:01.71Z" }, - { url = "https://files.pythonhosted.org/packages/a7/c7/03000262759d7b6f38c836ff9d512f438a70d8a8ddae68ee80de72dcfb63/scikit_learn-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:63a9afd6f7b229aad94618c01c252ce9e6fa97918c5ca19c9a17a087d819440c", size = 8702057, upload-time = "2025-09-09T08:21:04.234Z" }, - { url = "https://files.pythonhosted.org/packages/55/87/ef5eb1f267084532c8e4aef98a28b6ffe7425acbfd64b5e2f2e066bc29b3/scikit_learn-1.7.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9acb6c5e867447b4e1390930e3944a005e2cb115922e693c08a323421a6966e8", size = 9558731, upload-time = "2025-09-09T08:21:06.381Z" }, - { url = "https://files.pythonhosted.org/packages/93/f8/6c1e3fc14b10118068d7938878a9f3f4e6d7b74a8ddb1e5bed65159ccda8/scikit_learn-1.7.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:2a41e2a0ef45063e654152ec9d8bcfc39f7afce35b08902bfe290c2498a67a6a", size = 9038852, upload-time = "2025-09-09T08:21:08.628Z" }, - { url = "https://files.pythonhosted.org/packages/83/87/066cafc896ee540c34becf95d30375fe5cbe93c3b75a0ee9aa852cd60021/scikit_learn-1.7.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98335fb98509b73385b3ab2bd0639b1f610541d3988ee675c670371d6a87aa7c", size = 9527094, upload-time = "2025-09-09T08:21:11.486Z" }, - { url = "https://files.pythonhosted.org/packages/9c/2b/4903e1ccafa1f6453b1ab78413938c8800633988c838aa0be386cbb33072/scikit_learn-1.7.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:191e5550980d45449126e23ed1d5e9e24b2c68329ee1f691a3987476e115e09c", size = 9367436, upload-time = "2025-09-09T08:21:13.602Z" }, - { url = "https://files.pythonhosted.org/packages/b5/aa/8444be3cfb10451617ff9d177b3c190288f4563e6c50ff02728be67ad094/scikit_learn-1.7.2-cp313-cp313t-win_amd64.whl", hash = "sha256:57dc4deb1d3762c75d685507fbd0bc17160144b2f2ba4ccea5dc285ab0d0e973", size = 9275749, upload-time = "2025-09-09T08:21:15.96Z" }, - { url = "https://files.pythonhosted.org/packages/d9/82/dee5acf66837852e8e68df6d8d3a6cb22d3df997b733b032f513d95205b7/scikit_learn-1.7.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fa8f63940e29c82d1e67a45d5297bdebbcb585f5a5a50c4914cc2e852ab77f33", size = 9208906, upload-time = "2025-09-09T08:21:18.557Z" }, - { url = "https://files.pythonhosted.org/packages/3c/30/9029e54e17b87cb7d50d51a5926429c683d5b4c1732f0507a6c3bed9bf65/scikit_learn-1.7.2-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:f95dc55b7902b91331fa4e5845dd5bde0580c9cd9612b1b2791b7e80c3d32615", size = 8627836, upload-time = "2025-09-09T08:21:20.695Z" }, - { url = "https://files.pythonhosted.org/packages/60/18/4a52c635c71b536879f4b971c2cedf32c35ee78f48367885ed8025d1f7ee/scikit_learn-1.7.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9656e4a53e54578ad10a434dc1f993330568cfee176dff07112b8785fb413106", size = 9426236, upload-time = "2025-09-09T08:21:22.645Z" }, - { url = "https://files.pythonhosted.org/packages/99/7e/290362f6ab582128c53445458a5befd471ed1ea37953d5bcf80604619250/scikit_learn-1.7.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96dc05a854add0e50d3f47a1ef21a10a595016da5b007c7d9cd9d0bffd1fcc61", size = 9312593, upload-time = "2025-09-09T08:21:24.65Z" }, - { url = "https://files.pythonhosted.org/packages/8e/87/24f541b6d62b1794939ae6422f8023703bbf6900378b2b34e0b4384dfefd/scikit_learn-1.7.2-cp314-cp314-win_amd64.whl", hash = "sha256:bb24510ed3f9f61476181e4db51ce801e2ba37541def12dc9333b946fc7a9cf8", size = 8820007, upload-time = "2025-09-09T08:21:26.713Z" }, -] - -[[package]] -name = "scikit-learn" -version = "1.8.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.14' and sys_platform == 'darwin'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'darwin'", - "python_full_version >= '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", - "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", - "python_full_version == '3.11.*' and sys_platform == 'darwin'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", -] -dependencies = [ - { name = "joblib", marker = "python_full_version >= '3.11'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and python_full_version < '3.14'" }, - { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, - { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "threadpoolctl", marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/0e/d4/40988bf3b8e34feec1d0e6a051446b1f66225f8529b9309becaeef62b6c4/scikit_learn-1.8.0.tar.gz", hash = "sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd", size = 7335585, upload-time = "2025-12-10T07:08:53.618Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/92/53ea2181da8ac6bf27170191028aee7251f8f841f8d3edbfdcaf2008fde9/scikit_learn-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:146b4d36f800c013d267b29168813f7a03a43ecd2895d04861f1240b564421da", size = 8595835, upload-time = "2025-12-10T07:07:39.385Z" }, - { url = "https://files.pythonhosted.org/packages/01/18/d154dc1638803adf987910cdd07097d9c526663a55666a97c124d09fb96a/scikit_learn-1.8.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f984ca4b14914e6b4094c5d52a32ea16b49832c03bd17a110f004db3c223e8e1", size = 8080381, upload-time = "2025-12-10T07:07:41.93Z" }, - { url = "https://files.pythonhosted.org/packages/8a/44/226142fcb7b7101e64fdee5f49dbe6288d4c7af8abf593237b70fca080a4/scikit_learn-1.8.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e30adb87f0cc81c7690a84f7932dd66be5bac57cfe16b91cb9151683a4a2d3b", size = 8799632, upload-time = "2025-12-10T07:07:43.899Z" }, - { url = "https://files.pythonhosted.org/packages/36/4d/4a67f30778a45d542bbea5db2dbfa1e9e100bf9ba64aefe34215ba9f11f6/scikit_learn-1.8.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ada8121bcb4dac28d930febc791a69f7cb1673c8495e5eee274190b73a4559c1", size = 9103788, upload-time = "2025-12-10T07:07:45.982Z" }, - { url = "https://files.pythonhosted.org/packages/89/3c/45c352094cfa60050bcbb967b1faf246b22e93cb459f2f907b600f2ceda5/scikit_learn-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:c57b1b610bd1f40ba43970e11ce62821c2e6569e4d74023db19c6b26f246cb3b", size = 8081706, upload-time = "2025-12-10T07:07:48.111Z" }, - { url = "https://files.pythonhosted.org/packages/3d/46/5416595bb395757f754feb20c3d776553a386b661658fb21b7c814e89efe/scikit_learn-1.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:2838551e011a64e3053ad7618dda9310175f7515f1742fa2d756f7c874c05961", size = 7688451, upload-time = "2025-12-10T07:07:49.873Z" }, - { url = "https://files.pythonhosted.org/packages/90/74/e6a7cc4b820e95cc38cf36cd74d5aa2b42e8ffc2d21fe5a9a9c45c1c7630/scikit_learn-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5fb63362b5a7ddab88e52b6dbb47dac3fd7dafeee740dc6c8d8a446ddedade8e", size = 8548242, upload-time = "2025-12-10T07:07:51.568Z" }, - { url = "https://files.pythonhosted.org/packages/49/d8/9be608c6024d021041c7f0b3928d4749a706f4e2c3832bbede4fb4f58c95/scikit_learn-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5025ce924beccb28298246e589c691fe1b8c1c96507e6d27d12c5fadd85bfd76", size = 8079075, upload-time = "2025-12-10T07:07:53.697Z" }, - { url = "https://files.pythonhosted.org/packages/dd/47/f187b4636ff80cc63f21cd40b7b2d177134acaa10f6bb73746130ee8c2e5/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4496bb2cf7a43ce1a2d7524a79e40bc5da45cf598dbf9545b7e8316ccba47bb4", size = 8660492, upload-time = "2025-12-10T07:07:55.574Z" }, - { url = "https://files.pythonhosted.org/packages/97/74/b7a304feb2b49df9fafa9382d4d09061a96ee9a9449a7cbea7988dda0828/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0bcfe4d0d14aec44921545fd2af2338c7471de9cb701f1da4c9d85906ab847a", size = 8931904, upload-time = "2025-12-10T07:07:57.666Z" }, - { url = "https://files.pythonhosted.org/packages/9f/c4/0ab22726a04ede56f689476b760f98f8f46607caecff993017ac1b64aa5d/scikit_learn-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:35c007dedb2ffe38fe3ee7d201ebac4a2deccd2408e8621d53067733e3c74809", size = 8019359, upload-time = "2025-12-10T07:07:59.838Z" }, - { url = "https://files.pythonhosted.org/packages/24/90/344a67811cfd561d7335c1b96ca21455e7e472d281c3c279c4d3f2300236/scikit_learn-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:8c497fff237d7b4e07e9ef1a640887fa4fb765647f86fbe00f969ff6280ce2bb", size = 7641898, upload-time = "2025-12-10T07:08:01.36Z" }, - { url = "https://files.pythonhosted.org/packages/03/aa/e22e0768512ce9255eba34775be2e85c2048da73da1193e841707f8f039c/scikit_learn-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0d6ae97234d5d7079dc0040990a6f7aeb97cb7fa7e8945f1999a429b23569e0a", size = 8513770, upload-time = "2025-12-10T07:08:03.251Z" }, - { url = "https://files.pythonhosted.org/packages/58/37/31b83b2594105f61a381fc74ca19e8780ee923be2d496fcd8d2e1147bd99/scikit_learn-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:edec98c5e7c128328124a029bceb09eda2d526997780fef8d65e9a69eead963e", size = 8044458, upload-time = "2025-12-10T07:08:05.336Z" }, - { url = "https://files.pythonhosted.org/packages/2d/5a/3f1caed8765f33eabb723596666da4ebbf43d11e96550fb18bdec42b467b/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:74b66d8689d52ed04c271e1329f0c61635bcaf5b926db9b12d58914cdc01fe57", size = 8610341, upload-time = "2025-12-10T07:08:07.732Z" }, - { url = "https://files.pythonhosted.org/packages/38/cf/06896db3f71c75902a8e9943b444a56e727418f6b4b4a90c98c934f51ed4/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8fdf95767f989b0cfedb85f7ed8ca215d4be728031f56ff5a519ee1e3276dc2e", size = 8900022, upload-time = "2025-12-10T07:08:09.862Z" }, - { url = "https://files.pythonhosted.org/packages/1c/f9/9b7563caf3ec8873e17a31401858efab6b39a882daf6c1bfa88879c0aa11/scikit_learn-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:2de443b9373b3b615aec1bb57f9baa6bb3a9bd093f1269ba95c17d870422b271", size = 7989409, upload-time = "2025-12-10T07:08:12.028Z" }, - { url = "https://files.pythonhosted.org/packages/49/bd/1f4001503650e72c4f6009ac0c4413cb17d2d601cef6f71c0453da2732fc/scikit_learn-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:eddde82a035681427cbedded4e6eff5e57fa59216c2e3e90b10b19ab1d0a65c3", size = 7619760, upload-time = "2025-12-10T07:08:13.688Z" }, - { url = "https://files.pythonhosted.org/packages/d2/7d/a630359fc9dcc95496588c8d8e3245cc8fd81980251079bc09c70d41d951/scikit_learn-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7cc267b6108f0a1499a734167282c00c4ebf61328566b55ef262d48e9849c735", size = 8826045, upload-time = "2025-12-10T07:08:15.215Z" }, - { url = "https://files.pythonhosted.org/packages/cc/56/a0c86f6930cfcd1c7054a2bc417e26960bb88d32444fe7f71d5c2cfae891/scikit_learn-1.8.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:fe1c011a640a9f0791146011dfd3c7d9669785f9fed2b2a5f9e207536cf5c2fd", size = 8420324, upload-time = "2025-12-10T07:08:17.561Z" }, - { url = "https://files.pythonhosted.org/packages/46/1e/05962ea1cebc1cf3876667ecb14c283ef755bf409993c5946ade3b77e303/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72358cce49465d140cc4e7792015bb1f0296a9742d5622c67e31399b75468b9e", size = 8680651, upload-time = "2025-12-10T07:08:19.952Z" }, - { url = "https://files.pythonhosted.org/packages/fe/56/a85473cd75f200c9759e3a5f0bcab2d116c92a8a02ee08ccd73b870f8bb4/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80832434a6cc114f5219211eec13dcbc16c2bac0e31ef64c6d346cde3cf054cb", size = 8925045, upload-time = "2025-12-10T07:08:22.11Z" }, - { url = "https://files.pythonhosted.org/packages/cc/b7/64d8cfa896c64435ae57f4917a548d7ac7a44762ff9802f75a79b77cb633/scikit_learn-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ee787491dbfe082d9c3013f01f5991658b0f38aa8177e4cd4bf434c58f551702", size = 8507994, upload-time = "2025-12-10T07:08:23.943Z" }, - { url = "https://files.pythonhosted.org/packages/5e/37/e192ea709551799379958b4c4771ec507347027bb7c942662c7fbeba31cb/scikit_learn-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf97c10a3f5a7543f9b88cbf488d33d175e9146115a451ae34568597ba33dcde", size = 7869518, upload-time = "2025-12-10T07:08:25.71Z" }, - { url = "https://files.pythonhosted.org/packages/24/05/1af2c186174cc92dcab2233f327336058c077d38f6fe2aceb08e6ab4d509/scikit_learn-1.8.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c22a2da7a198c28dd1a6e1136f19c830beab7fdca5b3e5c8bba8394f8a5c45b3", size = 8528667, upload-time = "2025-12-10T07:08:27.541Z" }, - { url = "https://files.pythonhosted.org/packages/a8/25/01c0af38fe969473fb292bba9dc2b8f9b451f3112ff242c647fee3d0dfe7/scikit_learn-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:6b595b07a03069a2b1740dc08c2299993850ea81cce4fe19b2421e0c970de6b7", size = 8066524, upload-time = "2025-12-10T07:08:29.822Z" }, - { url = "https://files.pythonhosted.org/packages/be/ce/a0623350aa0b68647333940ee46fe45086c6060ec604874e38e9ab7d8e6c/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:29ffc74089f3d5e87dfca4c2c8450f88bdc61b0fc6ed5d267f3988f19a1309f6", size = 8657133, upload-time = "2025-12-10T07:08:31.865Z" }, - { url = "https://files.pythonhosted.org/packages/b8/cb/861b41341d6f1245e6ca80b1c1a8c4dfce43255b03df034429089ca2a2c5/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fb65db5d7531bccf3a4f6bec3462223bea71384e2cda41da0f10b7c292b9e7c4", size = 8923223, upload-time = "2025-12-10T07:08:34.166Z" }, - { url = "https://files.pythonhosted.org/packages/76/18/a8def8f91b18cd1ba6e05dbe02540168cb24d47e8dcf69e8d00b7da42a08/scikit_learn-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:56079a99c20d230e873ea40753102102734c5953366972a71d5cb39a32bc40c6", size = 8096518, upload-time = "2025-12-10T07:08:36.339Z" }, - { url = "https://files.pythonhosted.org/packages/d1/77/482076a678458307f0deb44e29891d6022617b2a64c840c725495bee343f/scikit_learn-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:3bad7565bc9cf37ce19a7c0d107742b320c1285df7aab1a6e2d28780df167242", size = 7754546, upload-time = "2025-12-10T07:08:38.128Z" }, - { url = "https://files.pythonhosted.org/packages/2d/d1/ef294ca754826daa043b2a104e59960abfab4cf653891037d19dd5b6f3cf/scikit_learn-1.8.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4511be56637e46c25721e83d1a9cea9614e7badc7040c4d573d75fbe257d6fd7", size = 8848305, upload-time = "2025-12-10T07:08:41.013Z" }, - { url = "https://files.pythonhosted.org/packages/5b/e2/b1f8b05138ee813b8e1a4149f2f0d289547e60851fd1bb268886915adbda/scikit_learn-1.8.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:a69525355a641bf8ef136a7fa447672fb54fe8d60cab5538d9eb7c6438543fb9", size = 8432257, upload-time = "2025-12-10T07:08:42.873Z" }, - { url = "https://files.pythonhosted.org/packages/26/11/c32b2138a85dcb0c99f6afd13a70a951bfdff8a6ab42d8160522542fb647/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c2656924ec73e5939c76ac4c8b026fc203b83d8900362eb2599d8aee80e4880f", size = 8678673, upload-time = "2025-12-10T07:08:45.362Z" }, - { url = "https://files.pythonhosted.org/packages/c7/57/51f2384575bdec454f4fe4e7a919d696c9ebce914590abf3e52d47607ab8/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15fc3b5d19cc2be65404786857f2e13c70c83dd4782676dd6814e3b89dc8f5b9", size = 8922467, upload-time = "2025-12-10T07:08:47.408Z" }, - { url = "https://files.pythonhosted.org/packages/35/4d/748c9e2872637a57981a04adc038dacaa16ba8ca887b23e34953f0b3f742/scikit_learn-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:00d6f1d66fbcf4eba6e356e1420d33cc06c70a45bb1363cd6f6a8e4ebbbdece2", size = 8774395, upload-time = "2025-12-10T07:08:49.337Z" }, - { url = "https://files.pythonhosted.org/packages/60/22/d7b2ebe4704a5e50790ba089d5c2ae308ab6bb852719e6c3bd4f04c3a363/scikit_learn-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f28dd15c6bb0b66ba09728cf09fd8736c304be29409bd8445a080c1280619e8c", size = 8002647, upload-time = "2025-12-10T07:08:51.601Z" }, -] - [[package]] name = "scipy" version = "1.15.3" @@ -6571,7 +6104,8 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version < '3.11' and sys_platform == 'darwin'", "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version < '3.11' and sys_platform == 'win32'", ] dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, @@ -6634,11 +6168,14 @@ resolution-markers = [ "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'darwin'", "python_full_version >= '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", - "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.14' and sys_platform == 'win32'", + "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'win32'", "python_full_version == '3.11.*' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')", + "python_full_version == '3.11.*' and sys_platform == 'win32'", ] dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and python_full_version < '3.14'" }, @@ -6817,25 +6354,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] -[[package]] -name = "skops" -version = "0.13.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, - { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, - { name = "packaging" }, - { name = "prettytable" }, - { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b5/0c/5ec987633e077dd0076178ea6ade2d6e57780b34afea0b497fb507d7a1ed/skops-0.13.0.tar.gz", hash = "sha256:66949fd3c95cbb5c80270fbe40293c0fe1e46cb4a921860e42584dd9c20ebeb1", size = 581312, upload-time = "2025-08-06T09:48:14.916Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/e8/6a2b2030f0689f894432b9c2f0357f2f3286b2a00474827e04b8fe9eea13/skops-0.13.0-py3-none-any.whl", hash = "sha256:55e2cccb18c86f5916e4cfe5acf55ed7b0eecddf08a151906414c092fa5926dc", size = 131200, upload-time = "2025-08-06T09:48:13.356Z" }, -] - [[package]] name = "smart-open" version = "7.5.0" @@ -6848,15 +6366,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ad/95/bc978be7ea0babf2fb48a414b6afaad414c6a9e8b1eafc5b8a53c030381a/smart_open-7.5.0-py3-none-any.whl", hash = "sha256:87e695c5148bbb988f15cec00971602765874163be85acb1c9fb8abc012e6599", size = 63940, upload-time = "2025-11-08T21:38:39.024Z" }, ] -[[package]] -name = "smmap" -version = "5.0.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329, upload-time = "2025-01-02T07:14:40.909Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303, upload-time = "2025-01-02T07:14:38.724Z" }, -] - [[package]] name = "sniffio" version = "1.3.1" @@ -7011,15 +6520,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/e1/3ccb13c643399d22289c6a9786c1a91e3dcbb68bce4beb44926ac2c557bf/sqlalchemy-2.0.45-py3-none-any.whl", hash = "sha256:5225a288e4c8cc2308dbdd874edad6e7d0fd38eac1e9e5f23503425c8eee20d0", size = 1936672, upload-time = "2025-12-09T21:54:52.608Z" }, ] -[[package]] -name = "sqlparse" -version = "0.5.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/90/76/437d71068094df0726366574cf3432a4ed754217b436eb7429415cf2d480/sqlparse-0.5.5.tar.gz", hash = "sha256:e20d4a9b0b8585fdf63b10d30066c7c94c5d7a7ec47c889a2d83a3caa93ff28e", size = 120815, upload-time = "2025-12-19T07:17:45.073Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/49/4b/359f28a903c13438ef59ebeee215fb25da53066db67b305c125f1c6d2a25/sqlparse-0.5.5-py3-none-any.whl", hash = "sha256:12a08b3bf3eec877c519589833aed092e2444e68240a3577e8e26148acc7b1ba", size = 46138, upload-time = "2025-12-19T07:17:46.573Z" }, -] - [[package]] name = "srsly" version = "2.5.3" @@ -7224,15 +6724,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9f/ef/1648fda54e9689058335ff54f650a7a314db2a42e21af1b83949b2dc748e/thinc-8.3.13-cp314-cp314-win_arm64.whl", hash = "sha256:11754fada9ad5ba2e02d5f3f234f940e24015b82333db58372f4a6aedad9b43f", size = 1667687, upload-time = "2026-03-23T07:22:34.967Z" }, ] -[[package]] -name = "threadpoolctl" -version = "3.6.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, -] - [[package]] name = "tinycss2" version = "1.4.0" @@ -7765,15 +7256,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/2a/dc2228b2888f51192c7dc766106cd475f1b768c10caaf9727659726f7391/virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f", size = 6008258, upload-time = "2026-01-09T18:20:59.425Z" }, ] -[[package]] -name = "waitress" -version = "3.0.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bf/cb/04ddb054f45faa306a230769e868c28b8065ea196891f09004ebace5b184/waitress-3.0.2.tar.gz", hash = "sha256:682aaaf2af0c44ada4abfb70ded36393f0e307f4ab9456a215ce0020baefc31f", size = 179901, upload-time = "2024-11-16T20:02:35.195Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/57/a27182528c90ef38d82b636a11f606b0cbb0e17588ed205435f8affe3368/waitress-3.0.2-py3-none-any.whl", hash = "sha256:c56d67fd6e87c2ee598b76abdd4e96cfad1f24cacdea5078d382b1f9d7b5ed2e", size = 56232, upload-time = "2024-11-16T20:02:33.858Z" }, -] - [[package]] name = "wasabi" version = "1.1.3" From 073908331785b4507431cfbf951e8854a299bbad Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Wed, 6 May 2026 05:11:46 -0700 Subject: [PATCH 07/24] MAINT: remove dead gbda_deterministic param, add AML launcher scripts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove gbda_deterministic/mpa_deterministic — dead code from GBDA attack that was never consumed by any GCG class. Its presence caused a TypeError in individual mode because MultiPromptAttack.__init__() doesn't accept it. This was a pre-existing bug from the original llm-attacks research repo (silently swallowed by **kwargs there, but our copy removed **kwargs). Also adds scripts/run_gcg_aml.py (launcher with sys.path fix for Azure ML) and scripts/submit_gcg_job.py (job submission reading from .env files). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../configs/individual_llama_2.yaml | 1 - .../configs/individual_llama_3.yaml | 1 - .../configs/individual_mistral.yaml | 1 - .../configs/individual_phi_3_mini.yaml | 1 - .../configs/individual_vicuna.yaml | 1 - .../gcg/experiments/train.py | 4 +- pyrit/auxiliary_attacks/gcg/src/Dockerfile | 2 +- scripts/run_gcg_aml.py | 28 +++++++++ scripts/submit_gcg_job.py | 63 +++++++++++++++++++ 9 files changed, 93 insertions(+), 9 deletions(-) create mode 100644 scripts/run_gcg_aml.py create mode 100644 scripts/submit_gcg_job.py diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml index 841ea4be21..10ef5504c7 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml @@ -28,4 +28,3 @@ learning_rate: 0.01 topk: 256 temp: 1 filter_cand: True -gbda_deterministic: True diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml index 2fbc77a85c..4eaf9c987a 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml @@ -28,4 +28,3 @@ learning_rate: 0.01 topk: 256 temp: 1 filter_cand: True -gbda_deterministic: True diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml index a897403d0a..640f4c32f0 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml @@ -28,4 +28,3 @@ learning_rate: 0.01 topk: 256 temp: 1 filter_cand: True -gbda_deterministic: True diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml index 3f3b466678..e3accd3c2a 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml @@ -28,4 +28,3 @@ learning_rate: 0.01 topk: 256 temp: 1 filter_cand: True -gbda_deterministic: True diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml index 91fe68a563..3fc3537e49 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml @@ -28,4 +28,3 @@ learning_rate: 0.01 topk: 256 temp: 1 filter_cand: True -gbda_deterministic: True diff --git a/pyrit/auxiliary_attacks/gcg/experiments/train.py b/pyrit/auxiliary_attacks/gcg/experiments/train.py index 7adaace8fc..4adf633d76 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/train.py +++ b/pyrit/auxiliary_attacks/gcg/experiments/train.py @@ -109,7 +109,7 @@ def generate_suffix( topk (int): Number of top candidates to consider. Defaults to 256. temp (int): Temperature for sampling. Defaults to 1. filter_cand (bool): Whether to filter invalid candidates. Defaults to True. - gbda_deterministic (bool): Whether to use deterministic mode. Defaults to True. + gbda_deterministic (bool): Unused, kept for config compatibility. Defaults to True. logfile (str): Path to log file. Defaults to "". random_seed (int): Random seed for reproducibility. Defaults to 42. """ @@ -297,7 +297,6 @@ def _create_attack( test_goals=test_goals, test_targets=test_targets, test_workers=test_workers, - mpa_deterministic=params.gbda_deterministic, mpa_lr=params.learning_rate, mpa_batch_size=params.batch_size, mpa_n_steps=params.n_steps, @@ -312,7 +311,6 @@ def _create_attack( test_goals=getattr(params, "test_goals", []), test_targets=getattr(params, "test_targets", []), test_workers=test_workers, - mpa_deterministic=params.gbda_deterministic, mpa_lr=params.learning_rate, mpa_batch_size=params.batch_size, mpa_n_steps=params.n_steps, diff --git a/pyrit/auxiliary_attacks/gcg/src/Dockerfile b/pyrit/auxiliary_attacks/gcg/src/Dockerfile index 674d4c4fa1..e820fbaaf5 100644 --- a/pyrit/auxiliary_attacks/gcg/src/Dockerfile +++ b/pyrit/auxiliary_attacks/gcg/src/Dockerfile @@ -16,7 +16,7 @@ ENV VIRTUAL_ENV="/opt/venv" WORKDIR /app -# Copy and install PyRIT with GCG extras +# Install PyRIT with GCG extras to get all dependencies COPY pyproject.toml MANIFEST.in README.md LICENSE /app/ COPY pyrit/ /app/pyrit/ RUN uv pip install -e ".[gcg]" && \ diff --git a/scripts/run_gcg_aml.py b/scripts/run_gcg_aml.py new file mode 100644 index 0000000000..5ae481ec60 --- /dev/null +++ b/scripts/run_gcg_aml.py @@ -0,0 +1,28 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Launcher script for GCG suffix generation on Azure ML. + +Ensures the uploaded code snapshot takes priority over the Docker-installed +package by prepending the working directory to sys.path before importing. + +Usage (Azure ML job command): + python scripts/run_gcg_aml.py --model_name phi_3_mini --setup single \ + --n_train_data 5 --n_test_data 0 --n_steps 5 --batch_size 64 +""" + +import os +import sys + +if __name__ == "__main__": + # Ensure uploaded code takes priority over Docker-installed package + sys.path.insert(0, os.getcwd()) + + # Change to experiments dir so relative config paths work + os.chdir(os.path.join(os.getcwd(), "pyrit", "auxiliary_attacks", "gcg", "experiments")) + + from pyrit.auxiliary_attacks.gcg.experiments.run import _parse_arguments, run_trainer + + args = _parse_arguments() + kwargs = {k: v for k, v in vars(args).items() if v is not None} + run_trainer(**kwargs) diff --git a/scripts/submit_gcg_job.py b/scripts/submit_gcg_job.py new file mode 100644 index 0000000000..77eb6bf639 --- /dev/null +++ b/scripts/submit_gcg_job.py @@ -0,0 +1,63 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Submit a GCG baseline job to Azure ML. + +Reads workspace configuration from PyRIT's .env files +(AZURE_ML_SUBSCRIPTION_ID, AZURE_ML_RESOURCE_GROUP, AZURE_ML_WORKSPACE_NAME). + +Usage: + python scripts/submit_gcg_job.py +""" + +import os +from pathlib import Path + +from azure.ai.ml import MLClient, command +from azure.ai.ml.entities import BuildContext, Environment +from azure.identity import AzureCliCredential + +from pyrit.common.path import HOME_PATH +from pyrit.setup.initialization import _load_environment_files + + +def main() -> None: + _load_environment_files(env_files=None) + + subscription_id = os.environ["AZURE_ML_SUBSCRIPTION_ID"] + resource_group = os.environ["AZURE_ML_RESOURCE_GROUP"] + workspace_name = os.environ["AZURE_ML_WORKSPACE_NAME"] + hf_token = os.environ.get("HUGGINGFACE_TOKEN", "") + + ml_client = MLClient( + AzureCliCredential(), subscription_id, resource_group, workspace_name + ) + + job = command( + code=Path(HOME_PATH), + command=( + "python scripts/run_gcg_aml.py" + " --model_name phi_3_mini" + " --setup single" + " --n_train_data 5" + " --n_test_data 0" + " --n_steps 5" + " --batch_size 64" + ), + inputs={}, + environment="pyrit-gcg:6", + environment_variables={"HUGGINGFACE_TOKEN": hf_token}, + compute="gcg-gpu-a100", + display_name="gcg_baseline", + description="GCG baseline: phi-3-mini, 5 steps, 5 train data", + tags={"Owner": "romanlutz"}, + ) + + returned_job = ml_client.jobs.create_or_update(job) + print(f"Job: {returned_job.name}") + print(f"Status: {returned_job.status}") + print(f"Studio URL: {returned_job.studio_url}") + + +if __name__ == "__main__": + main() From db2a096ade8aea197b6e225be3331d078435d81b Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Wed, 6 May 2026 05:38:53 -0700 Subject: [PATCH 08/24] FIX: remove dead mpa_kwargs passed to MultiPromptAttack.__init__ All mpa_kwargs (deterministic, lr, batch_size, n_steps) were silently absorbed by **kwargs in the original llm-attacks repo's MultiPromptAttack. __init__() but never read. Our copy removed **kwargs, exposing the bug. The original repo even has a typo: 'self.mpa_kewargs' in IndividualPromptAttack (line 1114 of llm-attacks/attack_manager.py). Verified: none of these kwargs are consumed by MultiPromptAttack in either the original repo or our copy. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py b/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py index 79bf568d09..460200d2ef 100644 --- a/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py +++ b/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py @@ -1111,7 +1111,6 @@ def run( self.test_goals, self.test_targets, self.test_workers, - **self.mpa_kwargs, ) if num_goals == len(self.goals) and num_workers == len(self.workers): stop_inner_on_success = False @@ -1349,7 +1348,6 @@ def run( self.test_goals, self.test_targets, self.test_workers, - **self.mpa_kwargs, ) attack.run( n_steps=n_steps, @@ -1521,7 +1519,6 @@ def run( self.test_prefixes, self.logfile, self.managers, - **self.mpa_kwargs, ) all_inputs = [p.eval_str for p in attack.prompts[0]._prompts] max_new_tokens = [p.test_new_toks for p in attack.prompts[0]._prompts] From e7bee410b2e4dfcc8601dbb88a5af89c36f9f880 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Wed, 6 May 2026 06:40:28 -0700 Subject: [PATCH 09/24] MAINT: switch Azure ML baseline to llama-2 (phi-3 has fastchat bug #965) Phi-3-mini hits 'Conversation has no attribute system' in _update_ids() due to fastchat API change. Llama-2 has dedicated handling path that works. GCG baseline VALIDATED on Azure ML: - Model: meta-llama/Llama-2-7b-chat-hf - Config: 5 prompts, 5 steps, batch_size 64 - Result: loss decreases across steps (1.9 -> 0.86 on best prompt) - Runtime: ~6 min on Standard_NC24ads_A100_v4 - Job: silly_vinegar_82x7td6gpn (Completed) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- scripts/submit_gcg_job.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/submit_gcg_job.py b/scripts/submit_gcg_job.py index 77eb6bf639..662bd5a78e 100644 --- a/scripts/submit_gcg_job.py +++ b/scripts/submit_gcg_job.py @@ -37,7 +37,7 @@ def main() -> None: code=Path(HOME_PATH), command=( "python scripts/run_gcg_aml.py" - " --model_name phi_3_mini" + " --model_name llama_2" " --setup single" " --n_train_data 5" " --n_test_data 0" @@ -48,8 +48,8 @@ def main() -> None: environment="pyrit-gcg:6", environment_variables={"HUGGINGFACE_TOKEN": hf_token}, compute="gcg-gpu-a100", - display_name="gcg_baseline", - description="GCG baseline: phi-3-mini, 5 steps, 5 train data", + display_name="gcg_baseline_llama2", + description="GCG baseline: llama-2, 5 steps, 5 train data", tags={"Owner": "romanlutz"}, ) From 9a02400bf8e97e82bff62c767354f5579c872a3b Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Thu, 7 May 2026 11:21:10 -0700 Subject: [PATCH 10/24] TEST: add wiring test for IndividualPromptAttack -> MultiPromptAttack The existing TestCreateAttack tests mock the manager classes, so they never exercise MultiPromptAttack.__init__() with real kwargs. That's why the dead mpa_kwargs bug only surfaced on Azure (TypeError when MPA didn't accept deterministic / lr / etc). This test constructs the real GCG manager classes and verifies IndividualPromptAttack and ProgressiveMultiPromptAttack can create an internal MultiPromptAttack without error. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../gcg/test_attack_wiring.py | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py diff --git a/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py b/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py new file mode 100644 index 0000000000..7613a4780d --- /dev/null +++ b/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py @@ -0,0 +1,177 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Tests that exercise the full attack class wiring without mocking manager classes. + +These tests catch kwarg mismatches between IndividualPromptAttack/ProgressiveMultiPromptAttack +and MultiPromptAttack.__init__(), and template compatibility issues in _update_ids(). +""" + +from unittest.mock import MagicMock, patch + +import pytest + +attack_manager_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.attack.base.attack_manager", + reason="GCG optional dependencies (torch, mlflow, etc.) not installed", +) +torch = pytest.importorskip("torch", reason="torch not installed") + +gcg_attack_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.attack.gcg.gcg_attack", + reason="GCG optional dependencies not installed", +) + +IndividualPromptAttack = attack_manager_mod.IndividualPromptAttack +ProgressiveMultiPromptAttack = attack_manager_mod.ProgressiveMultiPromptAttack +MultiPromptAttack = attack_manager_mod.MultiPromptAttack +GCGAttackPrompt = gcg_attack_mod.GCGAttackPrompt +GCGPromptManager = gcg_attack_mod.GCGPromptManager +GCGMultiPromptAttack = gcg_attack_mod.GCGMultiPromptAttack + +train_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.experiments.train", + reason="GCG train module not available", +) +Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + +MANAGERS = { + "AP": GCGAttackPrompt, + "PM": GCGPromptManager, + "MPA": GCGMultiPromptAttack, +} + + +def _make_mock_worker() -> MagicMock: + """Create a mock worker with required attributes for attack construction.""" + worker = MagicMock() + worker.model.name_or_path = "test-model" + worker.tokenizer.name_or_path = "test-tokenizer" + worker.conv_template.name = "test-template" + return worker + + +class TestAttackClassWiring: + """Tests that verify attack classes can be constructed with real manager classes. + + These catch kwarg mismatches that mocked tests miss. + """ + + def test_individual_attack_creates_mpa_without_error(self) -> None: + """IndividualPromptAttack.run() should create MultiPromptAttack without TypeError. + + This catches the mpa_kwargs bug where dead kwargs (deterministic, lr, etc.) + were passed to MultiPromptAttack.__init__() which didn't accept them. + """ + worker = _make_mock_worker() + + # Create IndividualPromptAttack with the real GCG manager classes + attack = IndividualPromptAttack( + goals=["test goal"], + targets=["test target"], + workers=[worker], + control_init="! ! !", + managers=MANAGERS, + mpa_lr=0.01, + mpa_batch_size=64, + mpa_n_steps=5, + ) + + # The run() method creates MultiPromptAttack internally. + # Patch the MPA's run() to avoid actually running the attack, + # but let __init__ execute with real classes to catch kwarg issues. + with patch.object(GCGMultiPromptAttack, "run", return_value=("control", 0.5, 1)): + attack.run( + n_steps=1, + batch_size=64, + topk=256, + temp=1, + allow_non_ascii=False, + target_weight=1.0, + control_weight=0.0, + anneal=False, + test_steps=1, + incr_control=False, + stop_on_success=False, + verbose=False, + filter_cand=True, + ) + + def test_progressive_attack_creates_mpa_without_error(self) -> None: + """ProgressiveMultiPromptAttack.run() should create MultiPromptAttack without TypeError.""" + worker = _make_mock_worker() + + attack = ProgressiveMultiPromptAttack( + goals=["test goal"], + targets=["test target"], + workers=[worker], + progressive_goals=False, + progressive_models=False, + control_init="! ! !", + managers=MANAGERS, + mpa_lr=0.01, + mpa_batch_size=64, + mpa_n_steps=5, + ) + + with patch.object(GCGMultiPromptAttack, "run", return_value=("control", 0.5, 1)): + attack.run( + n_steps=1, + batch_size=64, + topk=256, + temp=1, + allow_non_ascii=False, + target_weight=1.0, + control_weight=0.0, + anneal=False, + test_steps=1, + incr_control=False, + stop_on_success=False, + verbose=False, + filter_cand=True, + ) + + def test_create_attack_individual_wires_correctly(self) -> None: + """_create_attack with transfer=False should produce an IndividualPromptAttack + that can create internal MPA instances without error.""" + worker = _make_mock_worker() + + params = Generator._build_params( + transfer=False, + control_init="! ! !", + result_prefix="test", + learning_rate=0.01, + batch_size=64, + n_steps=5, + ) + + attack = Generator._create_attack( + params=params, + managers=MANAGERS, + train_goals=["test goal"], + train_targets=["test target"], + test_goals=[], + test_targets=[], + workers=[worker], + test_workers=[], + ) + + assert isinstance(attack, IndividualPromptAttack) + + # Verify internal MPA creation works + with patch.object(GCGMultiPromptAttack, "run", return_value=("control", 0.5, 1)): + attack.run( + n_steps=1, + batch_size=64, + topk=256, + temp=1, + allow_non_ascii=False, + target_weight=1.0, + control_weight=0.0, + anneal=False, + test_steps=1, + incr_control=False, + stop_on_success=False, + verbose=False, + filter_cand=True, + ) From 123243ba02973bd25df8b67c806ef0cacfa57cdf Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Thu, 7 May 2026 11:21:28 -0700 Subject: [PATCH 11/24] TEST: add vicuna integration tests covering non-llama _update_ids path The existing GPT-2 integration tests only use the llama-2 conversation template path. Bugs in the else branch of AttackPrompt._update_ids -- like the Phi-3 conv_template.system AttributeError we hit on Azure -- would never be caught. The two new tests construct GCGAttackPrompt with the vicuna template, which exercises the same code path. They are marked xfail (strict=True) because vicuna's fastchat conversation template lacks a .system attribute, reproducing the same bug. The xfail marker references issue #965 and will flip to 'unexpectedly passed' when the fastchat replacement lands, prompting removal of the marker. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../auxiliary_attacks/test_gcg_integration.py | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/tests/integration/auxiliary_attacks/test_gcg_integration.py b/tests/integration/auxiliary_attacks/test_gcg_integration.py index d2e32d5974..8474fd0f3d 100644 --- a/tests/integration/auxiliary_attacks/test_gcg_integration.py +++ b/tests/integration/auxiliary_attacks/test_gcg_integration.py @@ -57,6 +57,18 @@ def conv_template(): return conv +@pytest.fixture() +def vicuna_conv_template(): + """Create a fresh vicuna conversation template for each test. + + Vicuna exercises the non-llama branch of `_update_ids` (the path that + references `conv_template.system` and uses `encoding.char_to_token`). + A bug in that branch — like the Phi-3 `.system` AttributeError we hit + on Azure (#965) — would never be caught by llama-2-only tests. + """ + return get_conversation_template("vicuna_v1.1") + + class TestTokenGradientsIntegration: """Integration tests for token_gradients with real GPT-2.""" @@ -214,3 +226,72 @@ def test_get_nonascii_toks_returns_nonempty_tensor(self, gpt2_tokenizer: transfo toks = get_nonascii_toks(gpt2_tokenizer, device="cpu") assert isinstance(toks, torch.Tensor) assert len(toks) > 0 + + +class TestGCGAttackPromptNonLlamaTemplate: + """Integration tests covering the non-llama branch of `AttackPrompt._update_ids`. + + The llama-2/llama-3 path is well-exercised above. The `else` branch contains + distinct logic that touches `conv_template.system`, `char_to_token`, and + different slice arithmetic. A bug here — like the Phi-3 `conv_template.system` + AttributeError we hit on Azure (#965) — would only surface with a + non-llama template, so we exercise it explicitly with vicuna. + + Both tests are currently `xfail` because vicuna (and any other modern + fastchat template that lacks a `.system` attribute) reproduces the same + AttributeError as Phi-3 — a known bug tracked in #965 that PR replacing + fastchat with `tokenizer.apply_chat_template()` will fix. Once that lands, + the xfail will flip to "unexpectedly passed" and the marker can be removed. + """ + + @pytest.mark.xfail( + reason="#965: fastchat templates without `.system` attribute crash _update_ids", + raises=AttributeError, + strict=True, + ) + def test_prompt_initializes_with_vicuna_template( + self, + gpt2_model: GPT2LMHeadModel, + gpt2_tokenizer: transformers.PreTrainedTokenizer, + vicuna_conv_template: object, + ) -> None: + """GCGAttackPrompt should construct successfully with the vicuna template.""" + prompt = GCGAttackPrompt( + goal="Tell me how", + target="Sure here is", + tokenizer=gpt2_tokenizer, + conv_template=vicuna_conv_template, + control_init="! ! ! ! !", + ) + + assert prompt._control_slice.start < prompt._control_slice.stop + assert prompt._target_slice.start < prompt._target_slice.stop + assert prompt._control_slice.stop <= prompt._target_slice.start + assert prompt.input_ids.shape[0] > 0 + + @pytest.mark.xfail( + reason="#965: fastchat templates without `.system` attribute crash _update_ids", + raises=AttributeError, + strict=True, + ) + def test_grad_returns_valid_gradient_with_vicuna_template( + self, + gpt2_model: GPT2LMHeadModel, + gpt2_tokenizer: transformers.PreTrainedTokenizer, + vicuna_conv_template: object, + ) -> None: + """gradient computation should work end-to-end on the non-llama path.""" + prompt = GCGAttackPrompt( + goal="Tell me how", + target="Sure here is", + tokenizer=gpt2_tokenizer, + conv_template=vicuna_conv_template, + control_init="! ! ! ! !", + ) + + grad = prompt.grad(gpt2_model) + + n_control = prompt._control_slice.stop - prompt._control_slice.start + assert grad.shape[0] == n_control + assert grad.shape[1] == gpt2_tokenizer.vocab_size + assert torch.isfinite(grad).all() From bd810298ca9803948b144ce60a5cb5fb2748bb48 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Thu, 7 May 2026 11:21:59 -0700 Subject: [PATCH 12/24] TEST: add Azure ML GCG e2e test, update notebook to llama-2 Updates the AML notebook to reflect the actual flow we ran during Phase 1c baseline validation: llama-2 baseline (phi-3 has fastchat #965 bug), run_gcg_aml.py launcher script (so the uploaded code snapshot wins over the Docker-installed package), repo-root build context (Dockerfile needs to COPY pyproject.toml + pyrit/ for pip install -e .[gcg]), and PyRIT-style env file loading via _load_environment_files. Adds tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py mirroring that same flow as a real e2e test. Submits a small (5-step, 5-train, batch 64) llama-2 GCG job, polls until terminal state, asserts Completed. Skipped unless RUN_ALL_TESTS=true and AZURE_ML_* + HUGGINGFACE_TOKEN env vars are set (since it submits real paid Azure ML compute). Always cancels the submitted job on test failure or interruption to avoid leaking compute. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../auxiliary_attacks/1_gcg_azure_ml.ipynb | 145 ++++++---------- doc/code/auxiliary_attacks/1_gcg_azure_ml.py | 77 +++++---- .../end_to_end/auxiliary_attacks/__init__.py | 4 + .../auxiliary_attacks/test_gcg_aml_e2e.py | 157 ++++++++++++++++++ 4 files changed, 258 insertions(+), 125 deletions(-) create mode 100644 tests/end_to_end/auxiliary_attacks/__init__.py create mode 100644 tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb index 416b27353e..c043f36a82 100644 --- a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb +++ b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb @@ -42,21 +42,16 @@ "execution_count": null, "id": "4", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "romanlutz\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "\n", - "# Enter details of your AML workspace\n", - "subscription_id = os.environ.get(\"AZURE_SUBSCRIPTION_ID\")\n", - "resource_group = os.environ.get(\"AZURE_RESOURCE_GROUP\")\n", + "from pyrit.setup.initialization import _load_environment_files\n", + "\n", + "_load_environment_files(env_files=None)\n", + "\n", + "subscription_id = os.environ.get(\"AZURE_ML_SUBSCRIPTION_ID\")\n", + "resource_group = os.environ.get(\"AZURE_ML_RESOURCE_GROUP\")\n", "workspace = os.environ.get(\"AZURE_ML_WORKSPACE_NAME\")\n", "print(workspace)" ] @@ -71,8 +66,6 @@ "from azure.ai.ml import MLClient\n", "from azure.identity import AzureCliCredential\n", "\n", - "# Get a handle to the workspace\n", - "# For some people DefaultAzureCredential may work better than AzureCliCredential.\n", "ml_client = MLClient(AzureCliCredential(), subscription_id, resource_group, workspace)" ] }, @@ -87,47 +80,38 @@ { "cell_type": "markdown", "id": "7", - "metadata": { - "lines_to_next_cell": 0 - }, + "metadata": {}, "source": [ - "To install the dependencies needed to run GCG, we create an AML environment from a [Dockerfile](../../../pyrit/auxiliary_attacks/gcg/src/Dockerfile)." + "To install the dependencies needed to run GCG, we create an AML environment from a\n", + "[Dockerfile](../../../pyrit/auxiliary_attacks/gcg/src/Dockerfile). The Dockerfile uses\n", + "an NVIDIA CUDA base image with Python 3.11 and installs PyRIT with the `gcg` extra." ] }, { "cell_type": "code", "execution_count": null, "id": "8", - "metadata": { - "lines_to_next_cell": 2 - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Environment({'arm_type': 'environment_version', 'latest_version': None, 'image': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'pyrit', 'description': 'PyRIT environment created from a Docker context.', 'tags': {}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': False, 'id': '/subscriptions/db1ba766-2ca3-42c6-a19a-0f0d43134a8c/resourceGroups/romanlutz/providers/Microsoft.MachineLearningServices/workspaces/romanlutz/environments/pyrit/versions/5', 'Resource__source_path': '', 'base_path': './git/PyRIT/doc/code/auxiliary_attacks', 'creation_context': , 'serialize': , 'version': '5', 'conda_file': None, 'build': , 'inference_config': None, 'os_type': 'Linux', 'conda_file_path': None, 'path': None, 'datastore': None, 'upload_hash': None, 'translated_conda_file': None})" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "metadata": {}, + "outputs": [], "source": [ "from pathlib import Path\n", "\n", - "from azure.ai.ml.entities import BuildContext, Environment, JobResourceConfiguration\n", + "from azure.ai.ml.entities import BuildContext, Environment\n", "\n", "from pyrit.common.path import HOME_PATH\n", "\n", - "# Configure the AML environment with path to Dockerfile and dependencies\n", + "# Configure the AML environment — build context is the repo root so the Dockerfile\n", + "# can COPY pyproject.toml and pyrit/ for pip install -e \".[gcg]\"\n", "env_docker_context = Environment(\n", - " build=BuildContext(path=Path(HOME_PATH) / \"pyrit\" / \"auxiliary_attacks\" / \"gcg\" / \"src\"),\n", - " name=\"pyrit\",\n", - " description=\"PyRIT environment created from a Docker context.\",\n", + " build=BuildContext(\n", + " path=Path(HOME_PATH),\n", + " dockerfile_path=\"pyrit/auxiliary_attacks/gcg/src/Dockerfile\",\n", + " ),\n", + " name=\"pyrit-gcg\",\n", + " description=\"PyRIT GCG environment: CUDA 12.1 + Python 3.11 + pip install -e .[gcg]\",\n", + " tags={\"Owner\": os.environ.get(\"USER\", \"unknown\")},\n", ")\n", "\n", - "# Create or update the AML environment\n", "ml_client.environments.create_or_update(env_docker_context)" ] }, @@ -144,9 +128,15 @@ "id": "10", "metadata": {}, "source": [ - "Finally, we configure the command to run the GCG algorithm. The entry file for the algorithm is [`run.py`](../../../pyrit/auxiliary_attacks/gcg/experiments/run.py), which takes several command line arguments, as shown below. We also have to specify the compute `instance_type` to run the algorithm on. In our experience, a GPU instance with at least 32GB of vRAM is required. In the example below, we use Standard_NC96ads_A100_v4.\n", + "Finally, we configure the command to run the GCG algorithm. We use a launcher script\n", + "(`scripts/run_gcg_aml.py`) that ensures the uploaded code snapshot takes priority over\n", + "the Docker-installed package.\n", + "\n", + "We also have to specify a GPU compute target. In our experience, a GPU instance with\n", + "at least 24GB of vRAM is required (e.g., Standard_NC24ads_A100_v4).\n", "\n", - "Depending on the compute instance you use, you may encounter \"out of memory\" errors. In this case, we recommend training on a smaller model or lowering `n_train_data` or `batch_size`." + "Depending on the compute instance you use, you may encounter \"out of memory\" errors.\n", + "In this case, we recommend training on a smaller model or lowering `n_train_data` or `batch_size`." ] }, { @@ -158,26 +148,24 @@ "source": [ "from azure.ai.ml import command\n", "\n", - "# Configure the command\n", "job = command(\n", " code=Path(HOME_PATH),\n", - " command=\"cd pyrit/auxiliary_attacks/gcg/experiments && python run.py --model_name ${{inputs.model_name}} --setup ${{inputs.setup}} --n_train_data ${{inputs.n_train_data}} --n_test_data ${{inputs.n_test_data}} --n_steps ${{inputs.n_steps}} --batch_size ${{inputs.batch_size}}\",\n", - " inputs={\n", - " \"model_name\": \"phi_3_mini\",\n", - " \"setup\": \"multiple\",\n", - " \"n_train_data\": 25,\n", - " \"n_test_data\": 0,\n", - " \"n_steps\": 500,\n", - " \"batch_size\": 256,\n", - " },\n", + " command=(\n", + " \"python scripts/run_gcg_aml.py\"\n", + " \" --model_name llama_2\"\n", + " \" --setup single\"\n", + " \" --n_train_data 5\"\n", + " \" --n_test_data 0\"\n", + " \" --n_steps 5\"\n", + " \" --batch_size 64\"\n", + " ),\n", + " inputs={},\n", " environment=f\"{env_docker_context.name}:{env_docker_context.version}\",\n", " environment_variables={\"HUGGINGFACE_TOKEN\": os.environ[\"HUGGINGFACE_TOKEN\"]},\n", - " display_name=\"suffix_generation\",\n", - " description=\"Generate a suffix for attacking LLMs.\",\n", - " resources=JobResourceConfiguration(\n", - " instance_type=\"Standard_NC96ads_A100_v4\",\n", - " instance_count=1,\n", - " ),\n", + " compute=\"gcg-gpu-a100\",\n", + " display_name=\"gcg_suffix_generation\",\n", + " description=\"Generate adversarial suffixes using GCG on Llama-2.\",\n", + " tags={\"Owner\": os.environ.get(\"USER\", \"unknown\")},\n", ")" ] }, @@ -186,48 +174,19 @@ "execution_count": null, "id": "12", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Your file exceeds 100 MB. If you experience low speeds, latency, or broken connections, we recommend using the AzCopyv10 tool for this file transfer.\n", - "\n", - "Example: azcopy copy './git/PyRIT' 'https://romanlutz0437468309.blob.core.windows.net/3f52e8b9-0bac-4c48-9e4a-a92e85a582c4-10s61nn9uso4b2p89xjypawyc7/PyRIT' \n", - "\n", - "See https://learn.microsoft.com/azure/storage/common/storage-use-azcopy-v10 for more information.\n", - "\u001b[39m\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "# Submit the command\n", - "returned_job = ml_client.create_or_update(job)" + "returned_job = ml_client.create_or_update(job)\n", + "print(f\"Job: {returned_job.name}\")\n", + "print(f\"Status: {returned_job.status}\")\n", + "print(f\"Studio URL: {returned_job.studio_url}\")" ] } ], "metadata": { "jupytext": { - "cell_metadata_filter": "-all" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.2" + "cell_metadata_filter": "-all", + "main_language": "python" } }, "nbformat": 4, diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.py b/doc/code/auxiliary_attacks/1_gcg_azure_ml.py index db324926bf..98a40e350a 100644 --- a/doc/code/auxiliary_attacks/1_gcg_azure_ml.py +++ b/doc/code/auxiliary_attacks/1_gcg_azure_ml.py @@ -29,9 +29,12 @@ # %% import os -# Enter details of your AML workspace -subscription_id = os.environ.get("AZURE_SUBSCRIPTION_ID") -resource_group = os.environ.get("AZURE_RESOURCE_GROUP") +from pyrit.setup.initialization import _load_environment_files + +_load_environment_files(env_files=None) + +subscription_id = os.environ.get("AZURE_ML_SUBSCRIPTION_ID") +resource_group = os.environ.get("AZURE_ML_RESOURCE_GROUP") workspace = os.environ.get("AZURE_ML_WORKSPACE_NAME") print(workspace) @@ -39,66 +42,76 @@ from azure.ai.ml import MLClient from azure.identity import AzureCliCredential -# Get a handle to the workspace -# For some people DefaultAzureCredential may work better than AzureCliCredential. ml_client = MLClient(AzureCliCredential(), subscription_id, resource_group, workspace) # %% [markdown] # ## Create AML Environment # %% [markdown] -# To install the dependencies needed to run GCG, we create an AML environment from a [Dockerfile](../../../pyrit/auxiliary_attacks/gcg/src/Dockerfile). +# To install the dependencies needed to run GCG, we create an AML environment from a +# [Dockerfile](../../../pyrit/auxiliary_attacks/gcg/src/Dockerfile). The Dockerfile uses +# an NVIDIA CUDA base image with Python 3.11 and installs PyRIT with the `gcg` extra. + # %% from pathlib import Path -from azure.ai.ml.entities import BuildContext, Environment, JobResourceConfiguration +from azure.ai.ml.entities import BuildContext, Environment from pyrit.common.path import HOME_PATH -# Configure the AML environment with path to Dockerfile and dependencies +# Configure the AML environment — build context is the repo root so the Dockerfile +# can COPY pyproject.toml and pyrit/ for pip install -e ".[gcg]" env_docker_context = Environment( - build=BuildContext(path=Path(HOME_PATH) / "pyrit" / "auxiliary_attacks" / "gcg" / "src"), - name="pyrit", - description="PyRIT environment created from a Docker context.", + build=BuildContext( + path=Path(HOME_PATH), + dockerfile_path="pyrit/auxiliary_attacks/gcg/src/Dockerfile", + ), + name="pyrit-gcg", + description="PyRIT GCG environment: CUDA 12.1 + Python 3.11 + pip install -e .[gcg]", + tags={"Owner": os.environ.get("USER", "unknown")}, ) -# Create or update the AML environment ml_client.environments.create_or_update(env_docker_context) - # %% [markdown] # ## Submit Training Job to AML # %% [markdown] -# Finally, we configure the command to run the GCG algorithm. The entry file for the algorithm is [`run.py`](../../../pyrit/auxiliary_attacks/gcg/experiments/run.py), which takes several command line arguments, as shown below. We also have to specify the compute `instance_type` to run the algorithm on. In our experience, a GPU instance with at least 32GB of vRAM is required. In the example below, we use Standard_NC96ads_A100_v4. +# Finally, we configure the command to run the GCG algorithm. We use a launcher script +# (`scripts/run_gcg_aml.py`) that ensures the uploaded code snapshot takes priority over +# the Docker-installed package. +# +# We also have to specify a GPU compute target. In our experience, a GPU instance with +# at least 24GB of vRAM is required (e.g., Standard_NC24ads_A100_v4). # -# Depending on the compute instance you use, you may encounter "out of memory" errors. In this case, we recommend training on a smaller model or lowering `n_train_data` or `batch_size`. +# Depending on the compute instance you use, you may encounter "out of memory" errors. +# In this case, we recommend training on a smaller model or lowering `n_train_data` or `batch_size`. # %% from azure.ai.ml import command -# Configure the command job = command( code=Path(HOME_PATH), - command="cd pyrit/auxiliary_attacks/gcg/experiments && python run.py --model_name ${{inputs.model_name}} --setup ${{inputs.setup}} --n_train_data ${{inputs.n_train_data}} --n_test_data ${{inputs.n_test_data}} --n_steps ${{inputs.n_steps}} --batch_size ${{inputs.batch_size}}", - inputs={ - "model_name": "phi_3_mini", - "setup": "multiple", - "n_train_data": 25, - "n_test_data": 0, - "n_steps": 500, - "batch_size": 256, - }, + command=( + "python scripts/run_gcg_aml.py" + " --model_name llama_2" + " --setup single" + " --n_train_data 5" + " --n_test_data 0" + " --n_steps 5" + " --batch_size 64" + ), + inputs={}, environment=f"{env_docker_context.name}:{env_docker_context.version}", environment_variables={"HUGGINGFACE_TOKEN": os.environ["HUGGINGFACE_TOKEN"]}, - display_name="suffix_generation", - description="Generate a suffix for attacking LLMs.", - resources=JobResourceConfiguration( - instance_type="Standard_NC96ads_A100_v4", - instance_count=1, - ), + compute="gcg-gpu-a100", + display_name="gcg_suffix_generation", + description="Generate adversarial suffixes using GCG on Llama-2.", + tags={"Owner": os.environ.get("USER", "unknown")}, ) # %% -# Submit the command returned_job = ml_client.create_or_update(job) +print(f"Job: {returned_job.name}") +print(f"Status: {returned_job.status}") +print(f"Studio URL: {returned_job.studio_url}") diff --git a/tests/end_to_end/auxiliary_attacks/__init__.py b/tests/end_to_end/auxiliary_attacks/__init__.py new file mode 100644 index 0000000000..acd459977f --- /dev/null +++ b/tests/end_to_end/auxiliary_attacks/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""End-to-end tests for PyRIT auxiliary attacks (GCG).""" diff --git a/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py b/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py new file mode 100644 index 0000000000..1499b216dd --- /dev/null +++ b/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py @@ -0,0 +1,157 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""End-to-end test for the GCG Azure ML pipeline. + +Mirrors the flow in `doc/code/auxiliary_attacks/1_gcg_azure_ml.py`: + 1. Connect to the AML workspace + 2. Build (or reuse) the GCG Docker environment + 3. Submit a small llama-2 GCG job (5 steps, 5 train data) + 4. Poll until the job reaches a terminal state + 5. Assert the job completed successfully + +Skipped unless `RUN_ALL_TESTS=true`. Per-test skips also apply when the +required Azure ML or HuggingFace credentials are missing, since this test +submits a real (paid) compute job. On test failure or interruption, the +submitted job is cancelled so it does not continue burning compute. + +Required environment variables when `RUN_ALL_TESTS=true`: + - AZURE_ML_SUBSCRIPTION_ID + - AZURE_ML_RESOURCE_GROUP + - AZURE_ML_WORKSPACE_NAME + - HUGGINGFACE_TOKEN (must have access to meta-llama/Llama-2-7b-chat-hf) + +Optional: + - AZURE_ML_GCG_COMPUTE (defaults to "gcg-gpu-a100") + - GCG_E2E_MAX_WAIT_SECONDS (defaults to 5400 — 90 minutes) +""" + +import contextlib +import os +import time +from pathlib import Path + +import pytest + +# Skip the entire module unless RUN_ALL_TESTS=true; this test submits real +# paid Azure ML compute so it should never run in default CI. +pytestmark = pytest.mark.skipif( + os.getenv("RUN_ALL_TESTS", "").lower() != "true", + reason="RUN_ALL_TESTS is not set to true", +) + +# Heavy imports deferred until skip check passes +azure_ai_ml = pytest.importorskip("azure.ai.ml", reason="azure-ai-ml not installed") +pytest.importorskip("azure.identity", reason="azure-identity not installed") + +from azure.ai.ml import MLClient, command # noqa: E402 +from azure.ai.ml.entities import BuildContext, Environment # noqa: E402 +from azure.identity import AzureCliCredential # noqa: E402 + +from pyrit.common.path import HOME_PATH # noqa: E402 +from pyrit.setup.initialization import _load_environment_files # noqa: E402 + +_REQUIRED_ENV_VARS = ( + "AZURE_ML_SUBSCRIPTION_ID", + "AZURE_ML_RESOURCE_GROUP", + "AZURE_ML_WORKSPACE_NAME", + "HUGGINGFACE_TOKEN", +) +_DEFAULT_COMPUTE = "gcg-gpu-a100" +_DEFAULT_MAX_WAIT_SECONDS = 5400 # 90 minutes +_POLL_INTERVAL_SECONDS = 30 +_TERMINAL_STATES = {"Completed", "Failed", "Canceled", "CancelRequested"} + + +@pytest.fixture(scope="module") +def ml_client() -> MLClient: + """Build an MLClient from the standard PyRIT env vars; skip if any are missing.""" + _load_environment_files(env_files=None, silent=True) + + missing = [name for name in _REQUIRED_ENV_VARS if not os.environ.get(name)] + if missing: + pytest.skip(f"Missing required env vars for GCG AML e2e test: {', '.join(missing)}") + + return MLClient( + AzureCliCredential(), + os.environ["AZURE_ML_SUBSCRIPTION_ID"], + os.environ["AZURE_ML_RESOURCE_GROUP"], + os.environ["AZURE_ML_WORKSPACE_NAME"], + ) + + +@pytest.fixture(scope="module") +def gcg_environment(ml_client: MLClient) -> Environment: + """Create or reuse the GCG Docker environment in the workspace.""" + env_def = Environment( + build=BuildContext( + path=Path(HOME_PATH), + dockerfile_path="pyrit/auxiliary_attacks/gcg/src/Dockerfile", + ), + name="pyrit-gcg", + description="PyRIT GCG environment (e2e test)", + ) + return ml_client.environments.create_or_update(env_def) + + +@pytest.mark.timeout(_DEFAULT_MAX_WAIT_SECONDS + 600) +def test_gcg_aml_baseline_job_completes(ml_client: MLClient, gcg_environment: Environment) -> None: + """Submit a tiny GCG job on llama-2, wait for completion, assert success. + + The job runs only 5 optimization steps over 5 train prompts so it finishes + in roughly 15-25 minutes (most of that time is GPU spin-up + model + download). It validates that the entire pipeline — code upload, env + build, GPU compute, model loading, attack loop — works end-to-end. + """ + compute = os.environ.get("AZURE_ML_GCG_COMPUTE", _DEFAULT_COMPUTE) + max_wait = int(os.environ.get("GCG_E2E_MAX_WAIT_SECONDS", _DEFAULT_MAX_WAIT_SECONDS)) + + job_def = command( + code=Path(HOME_PATH), + command=( + "python scripts/run_gcg_aml.py" + " --model_name llama_2" + " --setup single" + " --n_train_data 5" + " --n_test_data 0" + " --n_steps 5" + " --batch_size 64" + ), + inputs={}, + environment=f"{gcg_environment.name}:{gcg_environment.version}", + environment_variables={"HUGGINGFACE_TOKEN": os.environ["HUGGINGFACE_TOKEN"]}, + compute=compute, + display_name="gcg_e2e_baseline", + description="E2E test: GCG baseline on Llama-2, 5 steps.", + ) + + submitted_job = ml_client.jobs.create_or_update(job_def) + job_name = submitted_job.name + + final_status: str | None = None + try: + deadline = time.monotonic() + max_wait + while time.monotonic() < deadline: + current = ml_client.jobs.get(job_name) + status = current.status + if status in _TERMINAL_STATES: + final_status = status + break + time.sleep(_POLL_INTERVAL_SECONDS) + else: + pytest.fail( + f"GCG job '{job_name}' did not reach a terminal state within " + f"{max_wait}s (last status: {status!r}). Studio URL: {submitted_job.studio_url}" + ) + + assert final_status == "Completed", ( + f"GCG job '{job_name}' finished with status {final_status!r}, expected 'Completed'. " + f"Studio URL: {submitted_job.studio_url}" + ) + finally: + # Always try to cancel a non-terminal job so we never leak paid compute + # (e.g., if pytest is interrupted or the assertion fires before a + # terminal state is reached). + if final_status is None or final_status not in _TERMINAL_STATES: + with contextlib.suppress(Exception): + ml_client.jobs.begin_cancel(job_name) From 004333906173ea61ac501ff1c587b365dc9543af Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Thu, 7 May 2026 11:47:47 -0700 Subject: [PATCH 13/24] MAINT: invoke GCG runner via 'python -m', drop scripts/ launchers The scripts/ directory is not packaged for PyPI installs, so the AML launcher there was inaccessible to anyone who pip-installed pyrit[gcg]. Move the entry-point cwd handling into pyrit/auxiliary_attacks/gcg/ experiments/run.py itself: when run as `__main__`, chdir into the file's own directory so the relative `configs/` and `results/` paths resolve regardless of where the script is invoked from. AML jobs (notebook and e2e test) now run python -m pyrit.auxiliary_attacks.gcg.experiments.run --model_name ... which also makes the previous sys.path hack unnecessary -- `python -m` puts cwd at the front of sys.path, so the uploaded code snapshot still wins over the Docker-installed package. Deletes scripts/run_gcg_aml.py and scripts/submit_gcg_job.py (the latter was a CLI duplicate of the notebook's submission flow). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../auxiliary_attacks/1_gcg_azure_ml.ipynb | 9 +-- doc/code/auxiliary_attacks/1_gcg_azure_ml.py | 9 +-- .../auxiliary_attacks/gcg/experiments/run.py | 6 ++ scripts/run_gcg_aml.py | 28 --------- scripts/submit_gcg_job.py | 63 ------------------- .../auxiliary_attacks/test_gcg_aml_e2e.py | 2 +- 6 files changed, 17 insertions(+), 100 deletions(-) delete mode 100644 scripts/run_gcg_aml.py delete mode 100644 scripts/submit_gcg_job.py diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb index c043f36a82..48a6029161 100644 --- a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb +++ b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb @@ -128,9 +128,10 @@ "id": "10", "metadata": {}, "source": [ - "Finally, we configure the command to run the GCG algorithm. We use a launcher script\n", - "(`scripts/run_gcg_aml.py`) that ensures the uploaded code snapshot takes priority over\n", - "the Docker-installed package.\n", + "Finally, we configure the command to run the GCG algorithm. The entry point is\n", + "[`pyrit.auxiliary_attacks.gcg.experiments.run`](../../../pyrit/auxiliary_attacks/gcg/experiments/run.py),\n", + "invoked as a module so the uploaded code snapshot takes priority over the\n", + "Docker-installed package (Python's `-m` flag puts the cwd at the front of `sys.path`).\n", "\n", "We also have to specify a GPU compute target. In our experience, a GPU instance with\n", "at least 24GB of vRAM is required (e.g., Standard_NC24ads_A100_v4).\n", @@ -151,7 +152,7 @@ "job = command(\n", " code=Path(HOME_PATH),\n", " command=(\n", - " \"python scripts/run_gcg_aml.py\"\n", + " \"python -m pyrit.auxiliary_attacks.gcg.experiments.run\"\n", " \" --model_name llama_2\"\n", " \" --setup single\"\n", " \" --n_train_data 5\"\n", diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.py b/doc/code/auxiliary_attacks/1_gcg_azure_ml.py index 98a40e350a..6226bd93d9 100644 --- a/doc/code/auxiliary_attacks/1_gcg_azure_ml.py +++ b/doc/code/auxiliary_attacks/1_gcg_azure_ml.py @@ -77,9 +77,10 @@ # ## Submit Training Job to AML # %% [markdown] -# Finally, we configure the command to run the GCG algorithm. We use a launcher script -# (`scripts/run_gcg_aml.py`) that ensures the uploaded code snapshot takes priority over -# the Docker-installed package. +# Finally, we configure the command to run the GCG algorithm. The entry point is +# [`pyrit.auxiliary_attacks.gcg.experiments.run`](../../../pyrit/auxiliary_attacks/gcg/experiments/run.py), +# invoked as a module so the uploaded code snapshot takes priority over the +# Docker-installed package (Python's `-m` flag puts the cwd at the front of `sys.path`). # # We also have to specify a GPU compute target. In our experience, a GPU instance with # at least 24GB of vRAM is required (e.g., Standard_NC24ads_A100_v4). @@ -93,7 +94,7 @@ job = command( code=Path(HOME_PATH), command=( - "python scripts/run_gcg_aml.py" + "python -m pyrit.auxiliary_attacks.gcg.experiments.run" " --model_name llama_2" " --setup single" " --n_train_data 5" diff --git a/pyrit/auxiliary_attacks/gcg/experiments/run.py b/pyrit/auxiliary_attacks/gcg/experiments/run.py index 342db0a67a..93c5d8d388 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/run.py +++ b/pyrit/auxiliary_attacks/gcg/experiments/run.py @@ -3,6 +3,7 @@ import argparse import os +from pathlib import Path from typing import Any, Union import yaml @@ -105,6 +106,11 @@ def _parse_arguments() -> argparse.Namespace: if __name__ == "__main__": + # Resolve relative paths (configs/, results/) against this file's directory + # so the script works regardless of where it is invoked from -- including + # `python -m pyrit.auxiliary_attacks.gcg.experiments.run` from any cwd. + os.chdir(Path(__file__).resolve().parent) + args = _parse_arguments() run_trainer( model_name=args.model_name, diff --git a/scripts/run_gcg_aml.py b/scripts/run_gcg_aml.py deleted file mode 100644 index 5ae481ec60..0000000000 --- a/scripts/run_gcg_aml.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -"""Launcher script for GCG suffix generation on Azure ML. - -Ensures the uploaded code snapshot takes priority over the Docker-installed -package by prepending the working directory to sys.path before importing. - -Usage (Azure ML job command): - python scripts/run_gcg_aml.py --model_name phi_3_mini --setup single \ - --n_train_data 5 --n_test_data 0 --n_steps 5 --batch_size 64 -""" - -import os -import sys - -if __name__ == "__main__": - # Ensure uploaded code takes priority over Docker-installed package - sys.path.insert(0, os.getcwd()) - - # Change to experiments dir so relative config paths work - os.chdir(os.path.join(os.getcwd(), "pyrit", "auxiliary_attacks", "gcg", "experiments")) - - from pyrit.auxiliary_attacks.gcg.experiments.run import _parse_arguments, run_trainer - - args = _parse_arguments() - kwargs = {k: v for k, v in vars(args).items() if v is not None} - run_trainer(**kwargs) diff --git a/scripts/submit_gcg_job.py b/scripts/submit_gcg_job.py deleted file mode 100644 index 662bd5a78e..0000000000 --- a/scripts/submit_gcg_job.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -"""Submit a GCG baseline job to Azure ML. - -Reads workspace configuration from PyRIT's .env files -(AZURE_ML_SUBSCRIPTION_ID, AZURE_ML_RESOURCE_GROUP, AZURE_ML_WORKSPACE_NAME). - -Usage: - python scripts/submit_gcg_job.py -""" - -import os -from pathlib import Path - -from azure.ai.ml import MLClient, command -from azure.ai.ml.entities import BuildContext, Environment -from azure.identity import AzureCliCredential - -from pyrit.common.path import HOME_PATH -from pyrit.setup.initialization import _load_environment_files - - -def main() -> None: - _load_environment_files(env_files=None) - - subscription_id = os.environ["AZURE_ML_SUBSCRIPTION_ID"] - resource_group = os.environ["AZURE_ML_RESOURCE_GROUP"] - workspace_name = os.environ["AZURE_ML_WORKSPACE_NAME"] - hf_token = os.environ.get("HUGGINGFACE_TOKEN", "") - - ml_client = MLClient( - AzureCliCredential(), subscription_id, resource_group, workspace_name - ) - - job = command( - code=Path(HOME_PATH), - command=( - "python scripts/run_gcg_aml.py" - " --model_name llama_2" - " --setup single" - " --n_train_data 5" - " --n_test_data 0" - " --n_steps 5" - " --batch_size 64" - ), - inputs={}, - environment="pyrit-gcg:6", - environment_variables={"HUGGINGFACE_TOKEN": hf_token}, - compute="gcg-gpu-a100", - display_name="gcg_baseline_llama2", - description="GCG baseline: llama-2, 5 steps, 5 train data", - tags={"Owner": "romanlutz"}, - ) - - returned_job = ml_client.jobs.create_or_update(job) - print(f"Job: {returned_job.name}") - print(f"Status: {returned_job.status}") - print(f"Studio URL: {returned_job.studio_url}") - - -if __name__ == "__main__": - main() diff --git a/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py b/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py index 1499b216dd..36e21d85c3 100644 --- a/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py +++ b/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py @@ -109,7 +109,7 @@ def test_gcg_aml_baseline_job_completes(ml_client: MLClient, gcg_environment: En job_def = command( code=Path(HOME_PATH), command=( - "python scripts/run_gcg_aml.py" + "python -m pyrit.auxiliary_attacks.gcg.experiments.run" " --model_name llama_2" " --setup single" " --n_train_data 5" From 52e7c5d1c02a9d7f92f7eb0cf7e5578df8269f00 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Fri, 8 May 2026 10:38:10 -0700 Subject: [PATCH 14/24] MAINT: scope pyarrow 3.14 pin to the gcg extra The pyarrow>=22.0.0; python_version >= '3.14' pin was added in c98af28 to the core dependencies, but pyrit core does not actually need it -- without the gcg extra, the resolver picks a 3.14-compatible pyarrow on its own via the transitive datasets -> pyarrow chain. The pin is only needed when the gcg extra is installed because something in that extra constrains the resolution toward an older pyarrow that lacks cp314 wheels and fails to build from source on Python 3.14. Moves the pin to the gcg extra and adds an inline comment explaining why it is there, matching the existing precedent for the spacy cp314 wheel comment in the all extra. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pyproject.toml | 5 ++++- uv.lock | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c35b562ae1..639d6f9430 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,6 @@ dependencies = [ "confusable-homoglyphs>=3.3.1", "ecoji>=0.1.1", "datasets>=3.6.0", - "pyarrow>=22.0.0; python_version >= '3.14'", "fastapi>=0.115.0", "httpx[http2]>=0.27.2", "jinja2>=3.1.6", @@ -115,6 +114,10 @@ gcg = [ "accelerate>=1.7.0", "azure-ai-ml>=1.27.1", "ml-collections>=1.1.0", + # pyarrow is a transitive dep of `datasets`. With the gcg extra installed, + # the resolver picks a pyarrow version that lacks cp314 wheels and fails + # to build from source on Python 3.14; pin to a version that ships them. + "pyarrow>=22.0.0; python_version >= '3.14'", "sentencepiece>=0.2.0", "torch>=2.7.0", ] diff --git a/uv.lock b/uv.lock index 1bb0f8092f..f3dfa3997f 100644 --- a/uv.lock +++ b/uv.lock @@ -5185,7 +5185,6 @@ dependencies = [ { name = "openai" }, { name = "openpyxl" }, { name = "pillow" }, - { name = "pyarrow", version = "24.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, { name = "pydantic" }, { name = "pyjwt", extra = ["crypto"] }, { name = "pyodbc" }, @@ -5232,6 +5231,7 @@ gcg = [ { name = "accelerate" }, { name = "azure-ai-ml" }, { name = "ml-collections" }, + { name = "pyarrow", version = "24.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, { name = "sentencepiece" }, { name = "torch" }, ] @@ -5329,7 +5329,7 @@ requires-dist = [ { name = "pillow", specifier = ">=12.2.0" }, { name = "playwright", marker = "extra == 'all'", specifier = ">=1.49.0" }, { name = "playwright", marker = "extra == 'playwright'", specifier = ">=1.49.0" }, - { name = "pyarrow", marker = "python_full_version >= '3.14'", specifier = ">=22.0.0" }, + { name = "pyarrow", marker = "python_full_version >= '3.14' and extra == 'gcg'", specifier = ">=22.0.0" }, { name = "pydantic", specifier = ">=2.11.5" }, { name = "pyjwt", extras = ["crypto"], specifier = ">=2.8.0" }, { name = "pyodbc", specifier = ">=5.1.0" }, From 9aa1ca16d39f73e6d6d50689656a4ea1cfff9825 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Fri, 8 May 2026 10:47:30 -0700 Subject: [PATCH 15/24] TEST: run the AML notebook itself instead of duplicating its logic The AML e2e test previously rebuilt the MLClient, environment, and command from scratch -- a near-copy of the notebook's submission flow. Replace that with `runpy.run_path()` of `doc/code/auxiliary_attacks/1_gcg_azure_ml.py`. The notebook is jupytext percent format (the `# %%` markers are plain comments) so the file is valid Python and runs as a script. The test then pulls `returned_job` and `ml_client` out of the executed namespace and polls the job to a terminal state. Result: the notebook is the single source of truth for the submission flow, and the test verifies that what we ask users to run actually works end-to-end. Net diff is -27 lines. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../auxiliary_attacks/test_gcg_aml_e2e.py | 108 +++++------------- 1 file changed, 31 insertions(+), 77 deletions(-) diff --git a/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py b/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py index 36e21d85c3..bf761f9ee8 100644 --- a/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py +++ b/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py @@ -3,17 +3,19 @@ """End-to-end test for the GCG Azure ML pipeline. -Mirrors the flow in `doc/code/auxiliary_attacks/1_gcg_azure_ml.py`: - 1. Connect to the AML workspace - 2. Build (or reuse) the GCG Docker environment - 3. Submit a small llama-2 GCG job (5 steps, 5 train data) - 4. Poll until the job reaches a terminal state - 5. Assert the job completed successfully - -Skipped unless `RUN_ALL_TESTS=true`. Per-test skips also apply when the -required Azure ML or HuggingFace credentials are missing, since this test -submits a real (paid) compute job. On test failure or interruption, the -submitted job is cancelled so it does not continue burning compute. +Executes `doc/code/auxiliary_attacks/1_gcg_azure_ml.py` directly as a Python +script (the jupytext percent-format `# %%` markers are plain comments, so the +file is valid Python). After the notebook submits the AML job, this test +polls until the job reaches a terminal state and asserts success. + +Running the notebook itself keeps the submission logic in one place: the +tutorial people read is the same code we test. Anything the user can do +manually with the notebook, this test verifies works end-to-end. + +Skipped unless `RUN_ALL_TESTS=true`. Per-test skip also applies when the +required Azure ML or HuggingFace credentials are missing, since this submits +real (paid) compute. On test failure or interruption, the submitted job is +cancelled so it does not continue burning compute. Required environment variables when `RUN_ALL_TESTS=true`: - AZURE_ML_SUBSCRIPTION_ID @@ -22,12 +24,12 @@ - HUGGINGFACE_TOKEN (must have access to meta-llama/Llama-2-7b-chat-hf) Optional: - - AZURE_ML_GCG_COMPUTE (defaults to "gcg-gpu-a100") - - GCG_E2E_MAX_WAIT_SECONDS (defaults to 5400 — 90 minutes) + - GCG_E2E_MAX_WAIT_SECONDS (defaults to 5400 -- 90 minutes) """ import contextlib import os +import runpy import time from pathlib import Path @@ -41,13 +43,9 @@ ) # Heavy imports deferred until skip check passes -azure_ai_ml = pytest.importorskip("azure.ai.ml", reason="azure-ai-ml not installed") +pytest.importorskip("azure.ai.ml", reason="azure-ai-ml not installed") pytest.importorskip("azure.identity", reason="azure-identity not installed") -from azure.ai.ml import MLClient, command # noqa: E402 -from azure.ai.ml.entities import BuildContext, Environment # noqa: E402 -from azure.identity import AzureCliCredential # noqa: E402 - from pyrit.common.path import HOME_PATH # noqa: E402 from pyrit.setup.initialization import _load_environment_files # noqa: E402 @@ -57,83 +55,39 @@ "AZURE_ML_WORKSPACE_NAME", "HUGGINGFACE_TOKEN", ) -_DEFAULT_COMPUTE = "gcg-gpu-a100" +_NOTEBOOK_PATH = Path(HOME_PATH) / "doc" / "code" / "auxiliary_attacks" / "1_gcg_azure_ml.py" _DEFAULT_MAX_WAIT_SECONDS = 5400 # 90 minutes _POLL_INTERVAL_SECONDS = 30 _TERMINAL_STATES = {"Completed", "Failed", "Canceled", "CancelRequested"} -@pytest.fixture(scope="module") -def ml_client() -> MLClient: - """Build an MLClient from the standard PyRIT env vars; skip if any are missing.""" - _load_environment_files(env_files=None, silent=True) +@pytest.mark.timeout(_DEFAULT_MAX_WAIT_SECONDS + 600) +def test_gcg_aml_notebook_runs_to_completion() -> None: + """Execute the AML notebook end-to-end and verify the submitted job completes. + The notebook is the single source of truth for how a GCG job is submitted + to Azure ML. This test loads it via runpy, extracts the submitted job + + MLClient from its namespace, then polls until the job reaches a terminal + state and asserts ``Completed``. + """ + _load_environment_files(env_files=None, silent=True) missing = [name for name in _REQUIRED_ENV_VARS if not os.environ.get(name)] if missing: pytest.skip(f"Missing required env vars for GCG AML e2e test: {', '.join(missing)}") - return MLClient( - AzureCliCredential(), - os.environ["AZURE_ML_SUBSCRIPTION_ID"], - os.environ["AZURE_ML_RESOURCE_GROUP"], - os.environ["AZURE_ML_WORKSPACE_NAME"], - ) - - -@pytest.fixture(scope="module") -def gcg_environment(ml_client: MLClient) -> Environment: - """Create or reuse the GCG Docker environment in the workspace.""" - env_def = Environment( - build=BuildContext( - path=Path(HOME_PATH), - dockerfile_path="pyrit/auxiliary_attacks/gcg/src/Dockerfile", - ), - name="pyrit-gcg", - description="PyRIT GCG environment (e2e test)", - ) - return ml_client.environments.create_or_update(env_def) - - -@pytest.mark.timeout(_DEFAULT_MAX_WAIT_SECONDS + 600) -def test_gcg_aml_baseline_job_completes(ml_client: MLClient, gcg_environment: Environment) -> None: - """Submit a tiny GCG job on llama-2, wait for completion, assert success. - - The job runs only 5 optimization steps over 5 train prompts so it finishes - in roughly 15-25 minutes (most of that time is GPU spin-up + model - download). It validates that the entire pipeline — code upload, env - build, GPU compute, model loading, attack loop — works end-to-end. - """ - compute = os.environ.get("AZURE_ML_GCG_COMPUTE", _DEFAULT_COMPUTE) max_wait = int(os.environ.get("GCG_E2E_MAX_WAIT_SECONDS", _DEFAULT_MAX_WAIT_SECONDS)) - job_def = command( - code=Path(HOME_PATH), - command=( - "python -m pyrit.auxiliary_attacks.gcg.experiments.run" - " --model_name llama_2" - " --setup single" - " --n_train_data 5" - " --n_test_data 0" - " --n_steps 5" - " --batch_size 64" - ), - inputs={}, - environment=f"{gcg_environment.name}:{gcg_environment.version}", - environment_variables={"HUGGINGFACE_TOKEN": os.environ["HUGGINGFACE_TOKEN"]}, - compute=compute, - display_name="gcg_e2e_baseline", - description="E2E test: GCG baseline on Llama-2, 5 steps.", - ) - - submitted_job = ml_client.jobs.create_or_update(job_def) + notebook_globals = runpy.run_path(str(_NOTEBOOK_PATH), run_name="__main__") + submitted_job = notebook_globals["returned_job"] + ml_client = notebook_globals["ml_client"] job_name = submitted_job.name final_status: str | None = None + status: str | None = None try: deadline = time.monotonic() + max_wait while time.monotonic() < deadline: - current = ml_client.jobs.get(job_name) - status = current.status + status = ml_client.jobs.get(job_name).status if status in _TERMINAL_STATES: final_status = status break From 076ba36062664c115abf9afba032d243728c7a7a Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Fri, 8 May 2026 13:24:52 -0700 Subject: [PATCH 16/24] DOC: regenerate 1_gcg_azure_ml.ipynb with executed cell outputs Ran jupytext --to ipynb --execute against the notebook .py to capture cell outputs (workspace name, environment build status, submitted job name + status + Studio URL) per PyRIT convention. The submitted job ('lucid_muscle_nt947p71s0') ran to completion on Azure ML, doubling as a verification that the refactored notebook (which now invokes the GCG runner via 'python -m pyrit.auxiliary_attacks.gcg.experiments.run' instead of the old scripts/ launcher) still works end-to-end. The captured stderr cells include some Azure ML SDK telemetry noise ('ActivityCompleted: ... HowEnded=Failure' for benign UserError conditions like 'environment already at version N'). Will be cleaned up in a follow-up by suppressing the azure.ai.ml._telemetry logger in the notebook source. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../auxiliary_attacks/1_gcg_azure_ml.ipynb | 188 +++++++++++++++++- 1 file changed, 182 insertions(+), 6 deletions(-) diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb index 48a6029161..487cef0c24 100644 --- a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb +++ b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb @@ -42,7 +42,18 @@ "execution_count": null, "id": "4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found default environment files: ['./.pyrit/.env', './.pyrit/.env.local']\n", + "Loaded environment file: ./.pyrit/.env\n", + "Loaded environment file: ./.pyrit/.env.local\n", + "gcg-romanlutz\n" + ] + } + ], "source": [ "import os\n", "\n", @@ -61,7 +72,15 @@ "execution_count": null, "id": "5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class DeploymentTemplateOperations: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + } + ], "source": [ "from azure.ai.ml import MLClient\n", "from azure.identity import AzureCliCredential\n", @@ -92,7 +111,92 @@ "execution_count": null, "id": "8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ActivityCompleted: Activity=Datastore.ListSecrets, HowEnded=Failure, Duration=764.16 [ms], Exception=HttpResponseError, ErrorCategory=UserError, ErrorMessage=(UserError) No secrets for credentials of type None.\n", + "Code: UserError\n", + "Message: No secrets for credentials of type None.\n", + "Additional Information:Type: ComponentName\n", + "Info: {\n", + " \"value\": \"managementfrontend\"\n", + "}Type: Correlation\n", + "Info: {\n", + " \"value\": {\n", + " \"operation\": \"be38f8925269670ed8a97b474130d1bb\",\n", + " \"request\": \"596017727106517b\"\n", + " }\n", + "}Type: Environment\n", + "Info: {\n", + " \"value\": \"westus3\"\n", + "}Type: Location\n", + "Info: {\n", + " \"value\": \"westus3\"\n", + "}Type: Time\n", + "Info: {\n", + " \"value\": \"2026-05-08T18:06:45.1708999+00:00\"\n", + "}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "\u001b[39m\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ActivityCompleted: Activity=Environment.CreateOrUpdate, HowEnded=Failure, Duration=54867.21 [ms], Exception=ResourceExistsError, ErrorCategory=UserError, ErrorMessage=(UserError) Environment pyrit-gcg with version 7 is already registered and cannot be changed.\n", + "Code: UserError\n", + "Message: Environment pyrit-gcg with version 7 is already registered and cannot be changed.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ActivityCompleted: Activity=Datastore.ListSecrets, HowEnded=Failure, Duration=743.78 [ms], Exception=HttpResponseError, ErrorCategory=UserError, ErrorMessage=(UserError) No secrets for credentials of type None.\n", + "Code: UserError\n", + "Message: No secrets for credentials of type None.\n", + "Additional Information:Type: ComponentName\n", + "Info: {\n", + " \"value\": \"managementfrontend\"\n", + "}Type: Correlation\n", + "Info: {\n", + " \"value\": {\n", + " \"operation\": \"e474613de5b6b7f8851f4091c8ac0249\",\n", + " \"request\": \"be8aa928c6ad5831\"\n", + " }\n", + "}Type: Environment\n", + "Info: {\n", + " \"value\": \"westus3\"\n", + "}Type: Location\n", + "Info: {\n", + " \"value\": \"westus3\"\n", + "}Type: Time\n", + "Info: {\n", + " \"value\": \"2026-05-08T18:07:14.6660874+00:00\"\n", + "}\n" + ] + }, + { + "data": { + "text/plain": [ + "Environment({'arm_type': 'environment_version', 'latest_version': None, 'image': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'pyrit-gcg', 'description': 'PyRIT GCG environment: CUDA 12.1 + Python 3.11 + pip install -e .[gcg]', 'tags': {'Owner': 'unknown'}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': False, 'id': '/subscriptions/db1ba766-2ca3-42c6-a19a-0f0d43134a8c/resourceGroups/gcg-romanlutz/providers/Microsoft.MachineLearningServices/workspaces/gcg-romanlutz/environments/pyrit-gcg/versions/8', 'Resource__source_path': '', 'base_path': './git/PyRIT-wt-gcg-refactor/doc/code/auxiliary_attacks', 'creation_context': , 'serialize': , 'version': '8', 'conda_file': None, 'build': , 'inference_config': None, 'os_type': 'Linux', 'conda_file_path': None, 'path': None, 'datastore': None, 'upload_hash': None, 'translated_conda_file': None})" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from pathlib import Path\n", "\n", @@ -175,7 +279,68 @@ "execution_count": null, "id": "12", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "\u001b[39m\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Job: lucid_muscle_nt947p71s0\n", + "Status: Starting\n", + "Studio URL: https://ml.azure.com/runs/lucid_muscle_nt947p71s0?wsid=/subscriptions/db1ba766-2ca3-42c6-a19a-0f0d43134a8c/resourcegroups/gcg-romanlutz/workspaces/gcg-romanlutz&tid=72f988bf-86f1-41af-91ab-2d7cd011db47\n" + ] + } + ], "source": [ "returned_job = ml_client.create_or_update(job)\n", "print(f\"Job: {returned_job.name}\")\n", @@ -186,8 +351,19 @@ ], "metadata": { "jupytext": { - "cell_metadata_filter": "-all", - "main_language": "python" + "cell_metadata_filter": "-all" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.4" } }, "nbformat": 4, From e7137db438023170ff04f96208cd7e40a4f3fbe6 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Sat, 9 May 2026 06:40:02 -0700 Subject: [PATCH 17/24] FEAT: notebook polls AML job and prints generated suffix Adds two pieces to make the AML tutorial actually show users an end-to-end result instead of stopping at "Job submitted, monitor in Studio": 1. ``run.py`` now accepts ``--output_dir``. The runner writes its result JSON under whatever path the caller passes, defaulting to ``outputs/``. The notebook's command declares ``outputs={"results": Output(uri_folder)}`` and passes ``--output_dir ${{outputs.results}}`` so AML mounts a path, the runner writes there, and the contents are uploaded as a named output artifact (auto-capture of ``./outputs/`` is *not* available in SDK v2 command jobs -- you have to declare named outputs explicitly). 2. A new poll-and-inspect cell at the end polls the submitted job, then downloads with ``all=True``, finds the result JSON under ``/named-outputs/results/``, and prints the final loss and generated adversarial suffix. Also adds a (best-effort) logging suppression block early in the notebook for azure.ai.ml SDK telemetry. It catches the python-logging warnings but not the "ActivityCompleted: HowEnded=Failure" lines or the upload progress bars -- those go through the SDK's own stderr handler with propagate=False and are not reachable via standard logging config (see azure-ai-ml _utils/_logger_utils.py). The remaining noise is benign telemetry for expected UserError conditions like "environment already at this version". Notebook re-executed end-to-end against AML (job stoic_parcel_6clfs67hp9, llama-2, 5 train data, 5 steps): completed successfully, suffix downloaded and printed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../auxiliary_attacks/1_gcg_azure_ml.ipynb | 230 ++++++++++++++++-- doc/code/auxiliary_attacks/1_gcg_azure_ml.py | 76 +++++- .../auxiliary_attacks/gcg/experiments/run.py | 31 ++- 3 files changed, 308 insertions(+), 29 deletions(-) diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb index 487cef0c24..cdc2bc24d2 100644 --- a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb +++ b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb @@ -67,10 +67,38 @@ "print(workspace)" ] }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "The Azure ML SDK emits a fair amount of telemetry to stderr that looks\n", + "alarming but is benign: every operation logs an `ActivityCompleted: ...\n", + "HowEnded=Failure` line for any expected `UserError` (such as\n", + "`create_or_update` finding the environment already at the latest version),\n", + "and every preview / experimental class prints a one-line warning. Quiet\n", + "all of it so the rest of the notebook output stays focused on what\n", + "actually matters." + ] + }, { "cell_type": "code", "execution_count": null, - "id": "5", + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import warnings\n", + "\n", + "logging.getLogger(\"azure.ai.ml\").setLevel(logging.ERROR)\n", + "warnings.filterwarnings(\"ignore\", module=r\"azure\\.ai\\.ml.*\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", "metadata": {}, "outputs": [ { @@ -90,7 +118,7 @@ }, { "cell_type": "markdown", - "id": "6", + "id": "8", "metadata": {}, "source": [ "## Create AML Environment" @@ -98,7 +126,7 @@ }, { "cell_type": "markdown", - "id": "7", + "id": "9", "metadata": {}, "source": [ "To install the dependencies needed to run GCG, we create an AML environment from a\n", @@ -109,14 +137,14 @@ { "cell_type": "code", "execution_count": null, - "id": "8", + "id": "10", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "ActivityCompleted: Activity=Datastore.ListSecrets, HowEnded=Failure, Duration=764.16 [ms], Exception=HttpResponseError, ErrorCategory=UserError, ErrorMessage=(UserError) No secrets for credentials of type None.\n", + "ActivityCompleted: Activity=Datastore.ListSecrets, HowEnded=Failure, Duration=731.02 [ms], Exception=HttpResponseError, ErrorCategory=UserError, ErrorMessage=(UserError) No secrets for credentials of type None.\n", "Code: UserError\n", "Message: No secrets for credentials of type None.\n", "Additional Information:Type: ComponentName\n", @@ -125,8 +153,8 @@ "}Type: Correlation\n", "Info: {\n", " \"value\": {\n", - " \"operation\": \"be38f8925269670ed8a97b474130d1bb\",\n", - " \"request\": \"596017727106517b\"\n", + " \"operation\": \"d83f8c4d225dee5d56c301c18e298f59\",\n", + " \"request\": \"c537217eb2b56149\"\n", " }\n", "}Type: Environment\n", "Info: {\n", @@ -136,7 +164,7 @@ " \"value\": \"westus3\"\n", "}Type: Time\n", "Info: {\n", - " \"value\": \"2026-05-08T18:06:45.1708999+00:00\"\n", + " \"value\": \"2026-05-09T12:49:18.18528+00:00\"\n", "}\n" ] }, @@ -153,16 +181,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "ActivityCompleted: Activity=Environment.CreateOrUpdate, HowEnded=Failure, Duration=54867.21 [ms], Exception=ResourceExistsError, ErrorCategory=UserError, ErrorMessage=(UserError) Environment pyrit-gcg with version 7 is already registered and cannot be changed.\n", + "ActivityCompleted: Activity=Environment.CreateOrUpdate, HowEnded=Failure, Duration=33839.37 [ms], Exception=ResourceExistsError, ErrorCategory=UserError, ErrorMessage=(UserError) Environment pyrit-gcg with version 10 is already registered and cannot be changed.\n", "Code: UserError\n", - "Message: Environment pyrit-gcg with version 7 is already registered and cannot be changed.\n" + "Message: Environment pyrit-gcg with version 10 is already registered and cannot be changed.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "ActivityCompleted: Activity=Datastore.ListSecrets, HowEnded=Failure, Duration=743.78 [ms], Exception=HttpResponseError, ErrorCategory=UserError, ErrorMessage=(UserError) No secrets for credentials of type None.\n", + "ActivityCompleted: Activity=Datastore.ListSecrets, HowEnded=Failure, Duration=348.1 [ms], Exception=HttpResponseError, ErrorCategory=UserError, ErrorMessage=(UserError) No secrets for credentials of type None.\n", "Code: UserError\n", "Message: No secrets for credentials of type None.\n", "Additional Information:Type: ComponentName\n", @@ -171,8 +199,8 @@ "}Type: Correlation\n", "Info: {\n", " \"value\": {\n", - " \"operation\": \"e474613de5b6b7f8851f4091c8ac0249\",\n", - " \"request\": \"be8aa928c6ad5831\"\n", + " \"operation\": \"66a3d036ffde9abfa617b61d00bd6214\",\n", + " \"request\": \"139566989f2c3f74\"\n", " }\n", "}Type: Environment\n", "Info: {\n", @@ -182,14 +210,14 @@ " \"value\": \"westus3\"\n", "}Type: Time\n", "Info: {\n", - " \"value\": \"2026-05-08T18:07:14.6660874+00:00\"\n", + " \"value\": \"2026-05-09T12:49:49.3263735+00:00\"\n", "}\n" ] }, { "data": { "text/plain": [ - "Environment({'arm_type': 'environment_version', 'latest_version': None, 'image': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'pyrit-gcg', 'description': 'PyRIT GCG environment: CUDA 12.1 + Python 3.11 + pip install -e .[gcg]', 'tags': {'Owner': 'unknown'}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': False, 'id': '/subscriptions/db1ba766-2ca3-42c6-a19a-0f0d43134a8c/resourceGroups/gcg-romanlutz/providers/Microsoft.MachineLearningServices/workspaces/gcg-romanlutz/environments/pyrit-gcg/versions/8', 'Resource__source_path': '', 'base_path': './git/PyRIT-wt-gcg-refactor/doc/code/auxiliary_attacks', 'creation_context': , 'serialize': , 'version': '8', 'conda_file': None, 'build': , 'inference_config': None, 'os_type': 'Linux', 'conda_file_path': None, 'path': None, 'datastore': None, 'upload_hash': None, 'translated_conda_file': None})" + "Environment({'arm_type': 'environment_version', 'latest_version': None, 'image': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'pyrit-gcg', 'description': 'PyRIT GCG environment: CUDA 12.1 + Python 3.11 + pip install -e .[gcg]', 'tags': {'Owner': 'unknown'}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': False, 'id': '/subscriptions/db1ba766-2ca3-42c6-a19a-0f0d43134a8c/resourceGroups/gcg-romanlutz/providers/Microsoft.MachineLearningServices/workspaces/gcg-romanlutz/environments/pyrit-gcg/versions/11', 'Resource__source_path': '', 'base_path': './git/PyRIT-wt-gcg-refactor/doc/code/auxiliary_attacks', 'creation_context': , 'serialize': , 'version': '11', 'conda_file': None, 'build': , 'inference_config': None, 'os_type': 'Linux', 'conda_file_path': None, 'path': None, 'datastore': None, 'upload_hash': None, 'translated_conda_file': None})" ] }, "execution_count": null, @@ -221,7 +249,7 @@ }, { "cell_type": "markdown", - "id": "9", + "id": "11", "metadata": {}, "source": [ "## Submit Training Job to AML" @@ -229,7 +257,7 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "12", "metadata": {}, "source": [ "Finally, we configure the command to run the GCG algorithm. The entry point is\n", @@ -247,11 +275,11 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "13", "metadata": {}, "outputs": [], "source": [ - "from azure.ai.ml import command\n", + "from azure.ai.ml import Output, command\n", "\n", "job = command(\n", " code=Path(HOME_PATH),\n", @@ -263,8 +291,10 @@ " \" --n_test_data 0\"\n", " \" --n_steps 5\"\n", " \" --batch_size 64\"\n", + " \" --output_dir ${{outputs.results}}\"\n", " ),\n", " inputs={},\n", + " outputs={\"results\": Output(type=\"uri_folder\")},\n", " environment=f\"{env_docker_context.name}:{env_docker_context.version}\",\n", " environment_variables={\"HUGGINGFACE_TOKEN\": os.environ[\"HUGGINGFACE_TOKEN\"]},\n", " compute=\"gcg-gpu-a100\",\n", @@ -277,7 +307,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12", + "id": "14", "metadata": {}, "outputs": [ { @@ -331,13 +361,20 @@ "\n" ] }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "pathOnCompute is not a known attribute of class and will be ignored\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Job: lucid_muscle_nt947p71s0\n", + "Job: stoic_parcel_6clfs67hp9\n", "Status: Starting\n", - "Studio URL: https://ml.azure.com/runs/lucid_muscle_nt947p71s0?wsid=/subscriptions/db1ba766-2ca3-42c6-a19a-0f0d43134a8c/resourcegroups/gcg-romanlutz/workspaces/gcg-romanlutz&tid=72f988bf-86f1-41af-91ab-2d7cd011db47\n" + "Studio URL: https://ml.azure.com/runs/stoic_parcel_6clfs67hp9?wsid=/subscriptions/db1ba766-2ca3-42c6-a19a-0f0d43134a8c/resourcegroups/gcg-romanlutz/workspaces/gcg-romanlutz&tid=72f988bf-86f1-41af-91ab-2d7cd011db47\n" ] } ], @@ -347,6 +384,155 @@ "print(f\"Status: {returned_job.status}\")\n", "print(f\"Studio URL: {returned_job.studio_url}\")" ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "## Wait for the Job to Complete and Inspect the Generated Suffix\n", + "\n", + "The next cell polls the job until it reaches a terminal state (~20-30\n", + "minutes for the small 5-step baseline above), then downloads the named\n", + "`results` output and prints the final suffix. The runner writes its\n", + "result file as `individual_behaviors__gcg_.json` into\n", + "the directory Azure ML mounted for the `results` output, so it ends up\n", + "under `/named-outputs/results/` once we download. The\n", + "`controls` array in that file contains one entry per training step, and\n", + "the last entry is the final adversarial suffix that, appended to the user\n", + "prompt, was optimized to elicit the target response." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Job status: Preparing\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Job status: Queued\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Job status: Running\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Job status: Completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading artifact azureml://subscriptions/db1ba766-2ca3-42c6-a19a-0f0d43134a8c/resourcegroups/gcg-romanlutz/workspaces/gcg-romanlutz/datastores/workspaceblobstore/paths/azureml/stoic_parcel_6clfs67hp9/results/ to ./AppData/Local/Temp/gcg-aml-e4x7zwr5/named-outputs/results\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ActivityCompleted: Activity=Datastore.ListSecrets, HowEnded=Failure, Duration=350.53 [ms], Exception=HttpResponseError, ErrorCategory=UserError, ErrorMessage=(UserError) No secrets for credentials of type None.\n", + "Code: UserError\n", + "Message: No secrets for credentials of type None.\n", + "Additional Information:Type: ComponentName\n", + "Info: {\n", + " \"value\": \"managementfrontend\"\n", + "}Type: Correlation\n", + "Info: {\n", + " \"value\": {\n", + " \"operation\": \"c6de26a5821f768ace8e59f5b75601d7\",\n", + " \"request\": \"f84ff3aa7b1f0557\"\n", + " }\n", + "}Type: Environment\n", + "Info: {\n", + " \"value\": \"westus3\"\n", + "}Type: Location\n", + "Info: {\n", + " \"value\": \"westus3\"\n", + "}Type: Time\n", + "Info: {\n", + " \"value\": \"2026-05-09T13:18:13.3919233+00:00\"\n", + "}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading artifact azureml://datastores/workspaceartifactstore/ExperimentRun/dcid.stoic_parcel_6clfs67hp9 to ./AppData/Local/Temp/gcg-aml-e4x7zwr5/artifacts\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result file: individual_behaviors_llama_2_gcg_20260509-131606.json\n", + "Steps run: 30\n", + "Final loss: 0.87353515625\n", + "Generated suffix: '! ! }; Kingdom ! ! ! Move ! ! [former [ [ [ [ [ [ [ ['\n" + ] + } + ], + "source": [ + "import json\n", + "import tempfile\n", + "import time\n", + "from pathlib import Path\n", + "\n", + "_TERMINAL_STATES = {\"Completed\", \"Failed\", \"Canceled\", \"CancelRequested\"}\n", + "\n", + "last_status = None\n", + "while True:\n", + " current_status = ml_client.jobs.get(returned_job.name).status\n", + " if current_status != last_status:\n", + " print(f\"Job status: {current_status}\", flush=True)\n", + " last_status = current_status\n", + " if current_status in _TERMINAL_STATES:\n", + " break\n", + " time.sleep(60)\n", + "\n", + "assert current_status == \"Completed\", f\"Job did not complete successfully: {current_status}\"\n", + "\n", + "download_dir = Path(tempfile.mkdtemp(prefix=\"gcg-aml-\"))\n", + "ml_client.jobs.download(name=returned_job.name, download_path=str(download_dir), all=True)\n", + "\n", + "result_files = list(download_dir.rglob(\"individual_behaviors_*_gcg_*.json\"))\n", + "if not result_files:\n", + " print(f\"No GCG result file found under {download_dir}. Files captured:\")\n", + " for p in sorted(download_dir.rglob(\"*\")):\n", + " if p.is_file():\n", + " print(f\" {p.relative_to(download_dir)}\")\n", + " raise FileNotFoundError(\"Result JSON not in downloaded artifacts\")\n", + "\n", + "result_file = result_files[0]\n", + "with open(result_file) as f:\n", + " log = json.load(f)\n", + "\n", + "final_suffix = log[\"controls\"][-1] if log[\"controls\"] else None\n", + "final_loss = log[\"losses\"][-1] if log[\"losses\"] else None\n", + "\n", + "print(f\"Result file: {result_file.name}\")\n", + "print(f\"Steps run: {len(log['controls'])}\")\n", + "print(f\"Final loss: {final_loss}\")\n", + "print(f\"Generated suffix: {final_suffix!r}\")" + ] } ], "metadata": { diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.py b/doc/code/auxiliary_attacks/1_gcg_azure_ml.py index 6226bd93d9..ad35ae28e7 100644 --- a/doc/code/auxiliary_attacks/1_gcg_azure_ml.py +++ b/doc/code/auxiliary_attacks/1_gcg_azure_ml.py @@ -38,6 +38,22 @@ workspace = os.environ.get("AZURE_ML_WORKSPACE_NAME") print(workspace) +# %% [markdown] +# The Azure ML SDK emits a fair amount of telemetry to stderr that looks +# alarming but is benign: every operation logs an `ActivityCompleted: ... +# HowEnded=Failure` line for any expected `UserError` (such as +# `create_or_update` finding the environment already at the latest version), +# and every preview / experimental class prints a one-line warning. Quiet +# all of it so the rest of the notebook output stays focused on what +# actually matters. + +# %% +import logging +import warnings + +logging.getLogger("azure.ai.ml").setLevel(logging.ERROR) +warnings.filterwarnings("ignore", module=r"azure\.ai\.ml.*") + # %% from azure.ai.ml import MLClient from azure.identity import AzureCliCredential @@ -89,7 +105,7 @@ # In this case, we recommend training on a smaller model or lowering `n_train_data` or `batch_size`. # %% -from azure.ai.ml import command +from azure.ai.ml import Output, command job = command( code=Path(HOME_PATH), @@ -101,8 +117,10 @@ " --n_test_data 0" " --n_steps 5" " --batch_size 64" + " --output_dir ${{outputs.results}}" ), inputs={}, + outputs={"results": Output(type="uri_folder")}, environment=f"{env_docker_context.name}:{env_docker_context.version}", environment_variables={"HUGGINGFACE_TOKEN": os.environ["HUGGINGFACE_TOKEN"]}, compute="gcg-gpu-a100", @@ -116,3 +134,59 @@ print(f"Job: {returned_job.name}") print(f"Status: {returned_job.status}") print(f"Studio URL: {returned_job.studio_url}") + +# %% [markdown] +# ## Wait for the Job to Complete and Inspect the Generated Suffix +# +# The next cell polls the job until it reaches a terminal state (~20-30 +# minutes for the small 5-step baseline above), then downloads the named +# `results` output and prints the final suffix. The runner writes its +# result file as `individual_behaviors__gcg_.json` into +# the directory Azure ML mounted for the `results` output, so it ends up +# under `/named-outputs/results/` once we download. The +# `controls` array in that file contains one entry per training step, and +# the last entry is the final adversarial suffix that, appended to the user +# prompt, was optimized to elicit the target response. + +# %% +import json +import tempfile +import time +from pathlib import Path + +_TERMINAL_STATES = {"Completed", "Failed", "Canceled", "CancelRequested"} + +last_status = None +while True: + current_status = ml_client.jobs.get(returned_job.name).status + if current_status != last_status: + print(f"Job status: {current_status}", flush=True) + last_status = current_status + if current_status in _TERMINAL_STATES: + break + time.sleep(60) + +assert current_status == "Completed", f"Job did not complete successfully: {current_status}" + +download_dir = Path(tempfile.mkdtemp(prefix="gcg-aml-")) +ml_client.jobs.download(name=returned_job.name, download_path=str(download_dir), all=True) + +result_files = list(download_dir.rglob("individual_behaviors_*_gcg_*.json")) +if not result_files: + print(f"No GCG result file found under {download_dir}. Files captured:") + for p in sorted(download_dir.rglob("*")): + if p.is_file(): + print(f" {p.relative_to(download_dir)}") + raise FileNotFoundError("Result JSON not in downloaded artifacts") + +result_file = result_files[0] +with open(result_file) as f: + log = json.load(f) + +final_suffix = log["controls"][-1] if log["controls"] else None +final_loss = log["losses"][-1] if log["losses"] else None + +print(f"Result file: {result_file.name}") +print(f"Steps run: {len(log['controls'])}") +print(f"Final loss: {final_loss}") +print(f"Generated suffix: {final_suffix!r}") diff --git a/pyrit/auxiliary_attacks/gcg/experiments/run.py b/pyrit/auxiliary_attacks/gcg/experiments/run.py index 93c5d8d388..7558388e1f 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/run.py +++ b/pyrit/auxiliary_attacks/gcg/experiments/run.py @@ -30,7 +30,9 @@ def _load_yaml_to_dict(config_path: str) -> dict[str, Any]: return data -def run_trainer(*, model_name: str, setup: str = "single", **extra_config_parameters: Any) -> None: +def run_trainer( + *, model_name: str, setup: str = "single", output_dir: str = "outputs", **extra_config_parameters: Any +) -> None: """ Trains and generates adversarial suffix - single model single prompt. @@ -40,6 +42,10 @@ def run_trainer(*, model_name: str, setup: str = "single", **extra_config_parame setup (str): Identifier for the setup, currently supports - "single": one prompt one model - "multiple": multiple prompts one model or multiple prompts multiple models + output_dir (str): Directory (created if missing) to write the result JSON file to. + Defaults to "outputs". For Azure ML jobs, pass the path AML provides for the + named output (typically expanded from ``${{outputs.}}``) so the result + is uploaded to the job's artifact store. **extra_config_parameters: Additional parameters to override config values. Raises: @@ -57,7 +63,7 @@ def run_trainer(*, model_name: str, setup: str = "single", **extra_config_parame "train_data": ( "https://raw.githubusercontent.com/llm-attacks/llm-attacks/main/data/advbench/harmful_behaviors.csv" ), - "result_prefix": f"results/individual_behaviors_{model_name}_gcg", + "result_prefix": f"{output_dir}/individual_behaviors_{model_name}_gcg", "token": hf_token, } if setup != "single": @@ -74,8 +80,8 @@ def run_trainer(*, model_name: str, setup: str = "single", **extra_config_parame config["model_name"] = model_name trainer = GreedyCoordinateGradientAdversarialSuffixGenerator() - if not os.path.exists("results"): - os.makedirs("results") + if not os.path.exists(output_dir): + os.makedirs(output_dir) trainer.generate_suffix(**config) @@ -101,14 +107,26 @@ def _parse_arguments() -> argparse.Namespace: parser.add_argument("--n_steps", type=int, default=100, help="Number of steps") parser.add_argument("--batch_size", type=int, default=512, help="Batch size") parser.add_argument("--random_seed", type=int, default=None, help="Random seed") + parser.add_argument( + "--output_dir", + type=str, + default="outputs", + help=( + "Directory to write the result JSON to. Pass the path Azure ML " + "expands ${{outputs.}} to so the result is uploaded as a " + "named output artifact." + ), + ) return parser.parse_args() if __name__ == "__main__": - # Resolve relative paths (configs/, results/) against this file's directory - # so the script works regardless of where it is invoked from -- including + # Resolve relative paths (configs/) against this file's directory so the + # script works regardless of where it is invoked from -- including # `python -m pyrit.auxiliary_attacks.gcg.experiments.run` from any cwd. + # output_dir is left untouched so callers can point it at an absolute path + # (e.g. AML's ${{outputs.results}} expansion). os.chdir(Path(__file__).resolve().parent) args = _parse_arguments() @@ -122,4 +140,5 @@ def _parse_arguments() -> argparse.Namespace: batch_size=args.batch_size, test_steps=1, random_seed=args.random_seed, + output_dir=args.output_dir, ) From bee8f3f1e3b42f246bd60166f65098293df4bc76 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Sat, 9 May 2026 06:53:12 -0700 Subject: [PATCH 18/24] MAINT: bump azure-ai-ml to >=1.32.0 to silence ListSecrets telemetry noise The 1.32.0 release includes: Skip _list_secrets for identity-based datastores to prevent noisy telemetry traces. That bullet is exactly the source of the ``ActivityCompleted: Activity=Datastore.ListSecrets, HowEnded=Failure ... UserError ... No secrets for credentials of type None`` blob that was showing up in our Azure ML notebook's executed cell outputs and made it look like the env build / job submission was failing when it actually wasn't. After bumping, a quick smoke (build MLClient, list envs) drops from many lines of telemetry noise to a single ``Class X is experimental`` info message -- much more reasonable for a tutorial. Bumped both the ``gcg`` extra and the ``all`` extra so they stay aligned. The upload progress bars and the experimental-class warning still show up; those are separate noise sources that this SDK release does not address. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pyproject.toml | 4 ++-- uv.lock | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 639d6f9430..75fa99b6f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,7 +112,7 @@ huggingface = [ ] gcg = [ "accelerate>=1.7.0", - "azure-ai-ml>=1.27.1", + "azure-ai-ml>=1.32.0", "ml-collections>=1.1.0", # pyarrow is a transitive dep of `datasets`. With the gcg extra installed, # the resolver picks a pyarrow version that lacks cp314 wheels and fails @@ -141,7 +141,7 @@ speech = [ all = [ "accelerate>=1.7.0", "av>=14.0.0", - "azure-ai-ml>=1.27.1", + "azure-ai-ml>=1.32.0", "azure-cognitiveservices-speech>=1.44.0", "flask>=3.1.3", "ipykernel>=6.29.5", diff --git a/uv.lock b/uv.lock index f3dfa3997f..efd2ef8032 100644 --- a/uv.lock +++ b/uv.lock @@ -444,7 +444,7 @@ wheels = [ [[package]] name = "azure-ai-ml" -version = "1.31.0" +version = "1.32.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "azure-common" }, @@ -465,9 +465,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/08/74/472494706149d6b63dd22b1389d6b3756f586d98b1c074f120d971e2180f/azure_ai_ml-1.31.0.tar.gz", hash = "sha256:ba3f00df768292e197c47a03eb8eeb36d70bc865fca3fcf67738e0f124cc0d1c", size = 9456188, upload-time = "2025-12-30T18:31:00.529Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c9/2c/3087f02752886fea59c5f0453a0647b6f7970db8571d581a45a746b21df2/azure_ai_ml-1.32.0.tar.gz", hash = "sha256:edb90219520412132fac100a31a40264a2145150dc4a60fb884acdacc8ded7d5", size = 9442010, upload-time = "2026-03-16T17:04:34.915Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7d/bf/5b3df818fc6993f9b54bf88c21a462076e8c11ca7052556181e804de8e2e/azure_ai_ml-1.31.0-py3-none-any.whl", hash = "sha256:e2eb928660b10245349dfd1b8e92a598bd0df8af492d48edfcaf0b2bd621702d", size = 13186666, upload-time = "2025-12-30T18:31:03.833Z" }, + { url = "https://files.pythonhosted.org/packages/a9/cd/89930cb0ee2101ca67c250eaf302b61a86a0e6dd90fa8d7e89ed6cb5644d/azure_ai_ml-1.32.0-py3-none-any.whl", hash = "sha256:710cce6f706b64a65860218edc757ef1b9699ba9b284defe42926a9dc6116e15", size = 13169553, upload-time = "2026-03-16T17:04:38.897Z" }, ] [[package]] @@ -5296,8 +5296,8 @@ requires-dist = [ { name = "av", specifier = ">=14.0.0" }, { name = "av", marker = "extra == 'all'", specifier = ">=14.0.0" }, { name = "azure-ai-contentsafety", specifier = ">=1.0.0" }, - { name = "azure-ai-ml", marker = "extra == 'all'", specifier = ">=1.27.1" }, - { name = "azure-ai-ml", marker = "extra == 'gcg'", specifier = ">=1.27.1" }, + { name = "azure-ai-ml", marker = "extra == 'all'", specifier = ">=1.32.0" }, + { name = "azure-ai-ml", marker = "extra == 'gcg'", specifier = ">=1.32.0" }, { name = "azure-cognitiveservices-speech", marker = "extra == 'all'", specifier = ">=1.44.0" }, { name = "azure-cognitiveservices-speech", marker = "extra == 'speech'", specifier = ">=1.46.0" }, { name = "azure-core", specifier = ">=1.38.0" }, From 4c1ba12ae60d18965f126d682b10102a643f3126 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Sun, 10 May 2026 11:47:04 -0700 Subject: [PATCH 19/24] TEST: cover log.log_gpu_memory branches and move to test_log.py CI's coverage check (>=90% on diff) was failing on the log_gpu_memory try/except added in c98af28: lines 70-74 of log.py weren't reached. Two issues: 1. The TestGpuMemoryLogging class lived in test_lifecycle.py, which does pytest.importorskip on the GCG train module. CI installs the 'all' extra but not 'gcg', so ml_collections is missing and the train import fails, skipping the whole test_lifecycle.py module -- including the GPU memory tests, even though they only need stdlib. Moved them into test_log.py (which only importorskips the log module, all stdlib) so they actually run in CI. 2. The new test_log_gpu_memory_swallows_nvidia_smi_failure exercises the except branch (lines 73-74) that the old success-only test never hit. log_gpu_memory must swallow nvidia-smi failures so the training loop never crashes when run on a host without nvidia-smi. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../auxiliary_attacks/gcg/test_lifecycle.py | 41 ++------------ tests/unit/auxiliary_attacks/gcg/test_log.py | 56 ++++++++++++++++++- 2 files changed, 60 insertions(+), 37 deletions(-) diff --git a/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py b/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py index 99b820adec..83c616f535 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py +++ b/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py @@ -1,17 +1,18 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -import subprocess -from unittest.mock import MagicMock, patch +from unittest.mock import patch import pytest +# Note: GPU-memory tests live in test_log.py since they only need the log +# module (stdlib imports). Anything that touches the train module needs +# the gcg extra installed (ml_collections, torch, etc.) so we skip the +# whole module when those imports fail. log_mod = pytest.importorskip( "pyrit.auxiliary_attacks.gcg.experiments.log", reason="GCG optional dependencies (mlflow, etc.) not installed", ) -log_gpu_memory = log_mod.log_gpu_memory -get_gpu_memory = log_mod.get_gpu_memory train_mod = pytest.importorskip( "pyrit.auxiliary_attacks.gcg.experiments.train", @@ -20,38 +21,6 @@ Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator -class TestGpuMemoryLogging: - """Tests for GPU memory query and logging.""" - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") - def test_get_gpu_memory_parses_nvidia_smi(self, mock_sp: MagicMock) -> None: - """Should parse nvidia-smi output into a dict of GPU -> free memory.""" - mock_sp.check_output.return_value = b"memory.free [MiB]\n8000 MiB\n16000 MiB\n" - result = get_gpu_memory() - assert result == {"gpu1_free_memory": 8000, "gpu2_free_memory": 16000} - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") - def test_get_gpu_memory_single_gpu(self, mock_sp: MagicMock) -> None: - """Should handle single GPU output.""" - mock_sp.check_output.return_value = b"memory.free [MiB]\n24000 MiB\n" - result = get_gpu_memory() - assert result == {"gpu1_free_memory": 24000} - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") - def test_log_gpu_memory_logs_via_logging(self, mock_sp: MagicMock) -> None: - """Should log GPU memory info without error.""" - mock_sp.check_output.return_value = b"memory.free [MiB]\n8000 MiB\n16000 MiB\n" - # Should not raise - log_gpu_memory(step=5) - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") - def test_get_gpu_memory_handles_nvidia_smi_failure(self, mock_sp: MagicMock) -> None: - """Should propagate exception when nvidia-smi is not available.""" - mock_sp.check_output.side_effect = subprocess.CalledProcessError(1, "nvidia-smi") - with pytest.raises(subprocess.CalledProcessError): - get_gpu_memory() - - class TestGenerateSuffixLifecycle: """Tests for generate_suffix worker lifecycle management.""" diff --git a/tests/unit/auxiliary_attacks/gcg/test_log.py b/tests/unit/auxiliary_attacks/gcg/test_log.py index e30f96c460..c225b5aeba 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_log.py +++ b/tests/unit/auxiliary_attacks/gcg/test_log.py @@ -1,7 +1,8 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -from unittest.mock import MagicMock +import subprocess +from unittest.mock import MagicMock, patch import pytest @@ -9,6 +10,8 @@ "pyrit.auxiliary_attacks.gcg.experiments.log", reason="GCG optional dependencies not installed", ) +get_gpu_memory = log_mod.get_gpu_memory +log_gpu_memory = log_mod.log_gpu_memory log_loss = log_mod.log_loss log_params = log_mod.log_params log_table_summary = log_mod.log_table_summary @@ -76,3 +79,54 @@ def test_logs_table_summary(self) -> None: def test_logs_empty_summary(self) -> None: """Should handle empty losses and controls.""" log_table_summary(losses=[], controls=[], n_steps=0) + + +class TestGpuMemoryLogging: + """Tests for GPU memory query and logging. + + Lives here (not test_lifecycle.py) so the tests don't transitively + depend on the GCG `train` module (which requires `ml_collections`, + only installed with the `gcg` extra). The log module itself only + uses stdlib imports, so these tests run in any CI environment. + """ + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_get_gpu_memory_parses_nvidia_smi(self, mock_sp: MagicMock) -> None: + """Should parse nvidia-smi output into a dict of GPU -> free memory.""" + mock_sp.check_output.return_value = b"memory.free [MiB]\n8000 MiB\n16000 MiB\n" + result = get_gpu_memory() + assert result == {"gpu1_free_memory": 8000, "gpu2_free_memory": 16000} + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_get_gpu_memory_single_gpu(self, mock_sp: MagicMock) -> None: + """Should handle single GPU output.""" + mock_sp.check_output.return_value = b"memory.free [MiB]\n24000 MiB\n" + result = get_gpu_memory() + assert result == {"gpu1_free_memory": 24000} + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_log_gpu_memory_logs_via_logging(self, mock_sp: MagicMock) -> None: + """Should log GPU memory info without error on the success path.""" + mock_sp.check_output.return_value = b"memory.free [MiB]\n8000 MiB\n16000 MiB\n" + # Should not raise + log_gpu_memory(step=5) + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_log_gpu_memory_swallows_nvidia_smi_failure(self, mock_sp: MagicMock) -> None: + """Should swallow exceptions when nvidia-smi is not available. + + Covers the except branch of `log_gpu_memory` -- callers (like the + train loop) should never crash because the runtime happens not to + have nvidia-smi. + """ + mock_sp.check_output.side_effect = subprocess.CalledProcessError(1, "nvidia-smi") + # Must not raise + log_gpu_memory(step=5) + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_get_gpu_memory_handles_nvidia_smi_failure(self, mock_sp: MagicMock) -> None: + """`get_gpu_memory` itself should propagate the exception (only + `log_gpu_memory` is expected to swallow it).""" + mock_sp.check_output.side_effect = subprocess.CalledProcessError(1, "nvidia-smi") + with pytest.raises(subprocess.CalledProcessError): + get_gpu_memory() From d32d13b6fe44a96af08c35ff46ea2d982ca2b0d1 Mon Sep 17 00:00:00 2001 From: varshini2305 Date: Sun, 10 May 2026 17:28:40 -0700 Subject: [PATCH 20/24] FEAT: Drop fastchat from GCG, use tokenizer.apply_chat_template (#965) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCG previously relied on the (long-unmaintained) `fastchat` library to render the user/assistant exchange for each AttackPrompt. That library bundled a hardcoded list of ~5 model templates and broke whenever a new model shipped without a fastchat entry — most recently Phi-3-mini, whose template lacks `.system` and crashed `_update_ids` on Azure ML. This change replaces the fastchat-driven slice computation with `tokenizer.apply_chat_template()`, which is the standard HuggingFace path and works for any chat-tuned model whose tokenizer ships a Jinja chat template. It also drops `fastchat`/`fschat` as a runtime dependency entirely (removed from the GCG Dockerfile). The work is based on PR #1049 by @varshini2305, which prototyped the same approach. This commit polishes that prototype: - Removes WIP `print(...)` debug statements from `_update_ids`. - Replaces the narrow `_detect_assistant_role` heuristic (only matched `<|assistant|>` / `assistant:`) with positional computation: the assistant role tokens are always whatever sits between the end of the control and the start of the target. This works for llama-2's `[/INST]`, llama-3's header markers, phi-3 ChatML, and any future template without code changes. - Handles the edge case where `char_to_token` returns None (when a substring ends exactly at the prompt boundary, common for the target string). - Restores `**params.tokenizer_kwargs[i]` spread in `get_workers` (the prototype hardcoded `use_fast=True` and dropped user kwargs). - Drops `conv_template` from `AttackPrompt`, `PromptManager`, `MultiPromptAttack`, `EvaluateAttack`, `IndividualPromptAttack`, `ProgressiveMultiPromptAttack`, `ModelWorker`, `get_workers`, and the log-file dicts they populate. - Drops `conversation_templates` from `generate_suffix`, `_build_params`, every shipped YAML config, and the config-validation test. - Adds a clear `ValueError` in `get_workers` if a tokenizer has no chat template configured. Tests: - All 72 GCG unit tests pass (existing tests adjusted for new signatures). - The 12 GCG integration tests pass on real GPT-2 with both a llama-2-style and a ChatML/phi-3-style chat template — the second template shape was previously `xfail(strict=True, raises=AttributeError)` referencing this exact issue. Those xfail markers are removed. - Broader pyrit unit suite (7,492 tests) unaffected. Closes #965. Builds on #1049 by @varshini2305. Co-authored-by: Roman Lutz Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../gcg/attack/base/attack_manager.py | 220 ++++++------------ .../configs/individual_llama_2.yaml | 1 - .../configs/individual_llama_3.yaml | 1 - .../configs/individual_mistral.yaml | 1 - .../configs/individual_phi_3_mini.yaml | 1 - .../configs/individual_vicuna.yaml | 1 - .../configs/transfer_all_models.yaml | 1 - .../experiments/configs/transfer_llama_2.yaml | 1 - .../experiments/configs/transfer_llama_3.yaml | 1 - .../experiments/configs/transfer_mistral.yaml | 1 - .../configs/transfer_phi_3_mini.yaml | 1 - .../experiments/configs/transfer_vicuna.yaml | 1 - .../gcg/experiments/train.py | 5 - pyrit/auxiliary_attacks/gcg/src/Dockerfile | 3 +- .../auxiliary_attacks/test_gcg_integration.py | 134 +++++------ .../gcg/test_attack_wiring.py | 27 ++- .../gcg/test_data_and_config.py | 1 - .../auxiliary_attacks/gcg/test_gcg_core.py | 10 +- .../auxiliary_attacks/gcg/test_lifecycle.py | 10 +- 19 files changed, 176 insertions(+), 245 deletions(-) diff --git a/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py b/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py index 460200d2ef..6e7991ea30 100644 --- a/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py +++ b/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py @@ -17,8 +17,6 @@ import torch import torch.multiprocessing as mp import torch.nn as nn -from fastchat.conversation import Conversation, SeparatorStyle -from fastchat.model import get_conversation_template from transformers import ( AutoModelForCausalLM, AutoTokenizer, @@ -133,7 +131,6 @@ def __init__( goal: str, target: str, tokenizer: Any, - conv_template: Conversation, control_init: str = "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !", test_prefixes: Optional[list[str]] = None, ) -> None: @@ -146,9 +143,9 @@ def __init__( target (str): The target of the attack tokenizer (Transformer Tokenizer): - The tokenizer used to convert text into tokens - conv_template (Template): - The conversation template used for the attack + The tokenizer used to convert text into tokens. Must have a configured chat template + (i.e., ``tokenizer.chat_template`` is not ``None``); ``apply_chat_template`` is used + to render the user/assistant exchange instead of model-specific fastchat templates. control_init (str, optional): A string used to control the attack (default is "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !") test_prefixes (list, optional): @@ -160,11 +157,8 @@ def __init__( self.target = target self.control = control_init self.tokenizer = tokenizer - self.conv_template = conv_template self.test_prefixes = test_prefixes - self.conv_template.messages = [] - self.test_new_toks = len(self.tokenizer(self.target).input_ids) + 2 # buffer for prefix in self.test_prefixes: self.test_new_toks = max(self.test_new_toks, len(self.tokenizer(prefix).input_ids)) @@ -172,103 +166,68 @@ def __init__( self._update_ids() def _update_ids(self) -> None: - self.conv_template.append_message(self.conv_template.roles[0], f"{self.goal} {self.control}") - self.conv_template.append_message(self.conv_template.roles[1], f"{self.target}") - prompt = self.conv_template.get_prompt() + # Render the goal+control as the user turn and the target as the assistant turn using the + # tokenizer's built-in chat template. This replaces fastchat's per-model Conversation logic + # and works for any HuggingFace chat-tuned model (issue #965). + messages = [ + {"role": "user", "content": f"{self.goal} {self.control}"}, + {"role": "assistant", "content": f"{self.target}"}, + ] + prompt = self.tokenizer.apply_chat_template(messages, tokenize=False) encoding = self.tokenizer(prompt) toks = encoding.input_ids - if self.conv_template.name == "llama-2" or self.conv_template.name == "llama-3": - self.conv_template.messages = [] - - self.conv_template.append_message(self.conv_template.roles[0], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._user_role_slice = slice(None, len(toks)) - - self.conv_template.update_last_message(f"{self.goal}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._goal_slice = slice(self._user_role_slice.stop, max(self._user_role_slice.stop, len(toks))) - - separator = " " if self.goal else "" - self.conv_template.update_last_message(f"{self.goal}{separator}{self.control}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._control_slice = slice(self._goal_slice.stop, len(toks)) - - self.conv_template.append_message(self.conv_template.roles[1], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._assistant_role_slice = slice(self._control_slice.stop, len(toks)) - - self.conv_template.update_last_message(f"{self.target}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._target_slice = slice(self._assistant_role_slice.stop, len(toks) - 2) - self._loss_slice = slice(self._assistant_role_slice.stop - 1, len(toks) - 3) + # Locate goal/control/target substrings in the rendered prompt. + goal_start = prompt.find(self.goal) + control_start = prompt.find(self.control) + target_start = prompt.find(self.target) + if goal_start == -1 or control_start == -1 or target_start == -1: + raise ValueError( + "Could not locate goal/control/target in chat-templated prompt. " + f"prompt={prompt!r}, goal={self.goal!r}, " + f"control={self.control!r}, target={self.target!r}" + ) - else: - python_tokenizer = False or self.conv_template.name == "oasst_pythia" - try: - encoding.char_to_token(len(prompt) - 1) - except Exception: - python_tokenizer = True - if python_tokenizer: - # This is specific to the vicuna and pythia tokenizer and conversation prompt. - # It will not work with other tokenizers or prompts. - self.conv_template.messages = [] - - self.conv_template.append_message(self.conv_template.roles[0], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._user_role_slice = slice(None, len(toks)) - - self.conv_template.update_last_message(f"{self.goal}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._goal_slice = slice(self._user_role_slice.stop, max(self._user_role_slice.stop, len(toks) - 1)) - - separator = " " if self.goal else "" - self.conv_template.update_last_message(f"{self.goal}{separator}{self.control}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._control_slice = slice(self._goal_slice.stop, len(toks) - 1) - - self.conv_template.append_message(self.conv_template.roles[1], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._assistant_role_slice = slice(self._control_slice.stop, len(toks)) - - self.conv_template.update_last_message(f"{self.target}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._target_slice = slice(self._assistant_role_slice.stop, len(toks) - 1) - self._loss_slice = slice(self._assistant_role_slice.stop - 1, len(toks) - 2) - else: - self._system_slice = slice(None, encoding.char_to_token(len(self.conv_template.system))) - self._user_role_slice = slice( - encoding.char_to_token(prompt.find(self.conv_template.roles[0])), - encoding.char_to_token( - prompt.find(self.conv_template.roles[0]) + len(self.conv_template.roles[0]) + 1 - ), - ) - self._goal_slice = slice( - encoding.char_to_token(prompt.find(self.goal)), - encoding.char_to_token(prompt.find(self.goal) + len(self.goal)), - ) - self._control_slice = slice( - encoding.char_to_token(prompt.find(self.control)), - encoding.char_to_token(prompt.find(self.control) + len(self.control)), - ) - self._assistant_role_slice = slice( - encoding.char_to_token(prompt.find(self.conv_template.roles[1])), - encoding.char_to_token( - prompt.find(self.conv_template.roles[1]) + len(self.conv_template.roles[1]) + 1 - ), - ) - self._target_slice = slice( - encoding.char_to_token(prompt.find(self.target)), - encoding.char_to_token(prompt.find(self.target) + len(self.target)), - ) - self._loss_slice = slice( - encoding.char_to_token(prompt.find(self.target)) - 1, - encoding.char_to_token(prompt.find(self.target) + len(self.target)) - 1, - ) + # ``char_to_token`` returns None when the character index has no + # corresponding token (e.g. when the substring ends exactly at the end + # of the prompt or lands on whitespace squashed into a neighbouring + # token). For end positions we clamp to ``len(toks)``; for start + # positions we walk forward to the next character that does map to a + # token. Both are necessary for the slice arithmetic to remain valid + # across tokenizers/templates. + def end_tok(char_pos: int) -> int: + tok = encoding.char_to_token(char_pos) + return len(toks) if tok is None else tok + + def start_tok(char_pos: int) -> int: + limit = len(prompt) + cur = char_pos + while cur < limit: + tok = encoding.char_to_token(cur) + if tok is not None: + return tok + cur += 1 + return len(toks) + + self._goal_slice = slice( + start_tok(goal_start), + end_tok(goal_start + len(self.goal)), + ) + self._control_slice = slice( + start_tok(control_start), + end_tok(control_start + len(self.control)), + ) + target_start_tok = start_tok(target_start) + target_end_tok = end_tok(target_start + len(self.target)) + self._target_slice = slice(target_start_tok, target_end_tok) + self._loss_slice = slice(target_start_tok - 1, target_end_tok - 1) + # Assistant role tokens are everything between the control end and the target start. + # This works for any chat template (e.g. llama-2 "[/INST]", phi-3 "<|assistant|>", etc.) + # without us needing to know the literal marker text. + self._assistant_role_slice = slice(self._control_slice.stop, self._target_slice.start) self.input_ids = torch.tensor(toks[: self._target_slice.stop], device="cpu") - self.conv_template.messages = [] @torch.no_grad() # type: ignore[misc, untyped-decorator, unused-ignore] def generate(self, model: Any, gen_config: Any = None) -> torch.Tensor: @@ -457,7 +416,6 @@ def __init__( goals: list[str], targets: list[str], tokenizer: Any, - conv_template: Conversation, control_init: str = "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !", test_prefixes: Optional[list[str]] = None, managers: Optional[dict[str, type[AttackPrompt]]] = None, @@ -471,9 +429,7 @@ def __init__( targets (List[str]): The list of targets of the attack tokenizer (Transformer Tokenizer): - The tokenizer used to convert text into tokens - conv_template (Template): - The conversation template used for the attack + The tokenizer used to convert text into tokens. Must have a chat template configured. control_init (str, optional): A string used to control the attack (default is "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !") test_prefixes (list, optional): @@ -491,8 +447,7 @@ def __init__( self.tokenizer = tokenizer self._prompts = [ - managers["AP"](goal, target, tokenizer, conv_template, control_init, test_prefixes) - for goal, target in zip(goals, targets) + managers["AP"](goal, target, tokenizer, control_init, test_prefixes) for goal, target in zip(goals, targets) ] self._nonascii_toks = get_nonascii_toks(tokenizer, device="cpu") @@ -634,9 +589,7 @@ def __init__( self.models = [worker.model for worker in workers] self.logfile = logfile self.prompts = [ - managers["PM"]( - goals, targets, worker.tokenizer, worker.conv_template, control_init, test_prefixes, managers - ) + managers["PM"](goals, targets, worker.tokenizer, control_init, test_prefixes, managers) for worker in workers ] self.managers = managers @@ -819,7 +772,6 @@ def test_all(self) -> tuple[list[list[bool]], list[list[int]], list[list[float]] self.goals + self.test_goals, self.targets + self.test_targets, worker.tokenizer, - worker.conv_template, self.control_str, self.test_prefixes, self.managers, @@ -997,7 +949,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.workers ], @@ -1005,7 +957,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.test_workers ], @@ -1236,7 +1188,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.workers ], @@ -1244,7 +1196,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.test_workers ], @@ -1450,7 +1402,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.workers ], @@ -1458,7 +1410,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.test_workers ], @@ -1585,7 +1537,6 @@ def __init__( token: str, model_kwargs: dict[str, Any], tokenizer: Any, - conv_template: Conversation, device: str, ) -> None: self.model = ( @@ -1596,7 +1547,6 @@ def __init__( .eval() ) self.tokenizer = tokenizer - self.conv_template = conv_template self.tasks: mp.JoinableQueue[Any] = mp.JoinableQueue() self.results: mp.JoinableQueue[Any] = mp.JoinableQueue() self.process: Optional[mp.Process] = None @@ -1667,45 +1617,23 @@ def get_workers(params: Any, eval: bool = False) -> tuple[list[ModelWorker], lis tokenizer.padding_side = "left" if not tokenizer.pad_token: tokenizer.pad_token = tokenizer.eos_token + if tokenizer.chat_template is None: + raise ValueError( + f"Tokenizer {params.tokenizer_paths[i]!r} has no chat_template configured. GCG uses " + "tokenizer.apply_chat_template() to render prompts (see issue #965); without a chat " + "template the attack cannot be set up. Pick a chat-tuned model or set " + "tokenizer.chat_template explicitly." + ) tokenizers.append(tokenizer) logger.info(f"Loaded {len(tokenizers)} tokenizers") - raw_conv_templates = [] - for template in params.conversation_templates: - if template in ["llama-2", "mistral", "llama-3-8b", "vicuna"]: - raw_conv_templates.append(get_conversation_template(template)) - elif template in ["phi-3-mini"]: - conv_template = Conversation( - name="phi-3-mini", - system_template="<|system|>\n{system_message}", - system_message="", - roles=("<|user|>", "<|assistant|>"), - sep_style=SeparatorStyle.CHATML, - sep="<|end|>", - stop_token_ids=[32000, 32001, 32007], - ) - raw_conv_templates.append(conv_template) - else: - raise ValueError("Conversation template not recognized") - - conv_templates = [] - for conv in raw_conv_templates: - if conv.name == "zero_shot": - conv.roles = tuple("### " + r for r in conv.roles) - conv.sep = "\n" - elif conv.name == "llama-2": - conv.sep2 = conv.sep2.strip() - conv_templates.append(conv) - - logger.info(f"Loaded {len(conv_templates)} conversation templates") workers = [ ModelWorker( params.model_paths[i], params.token, params.model_kwargs[i], tokenizers[i], - conv_templates[i], params.devices[i], ) for i in range(len(params.model_paths)) diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml index 10ef5504c7..504fb3dd4b 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["meta-llama/Llama-2-7b-chat-hf"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["meta-llama/Llama-2-7b-chat-hf"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-2"] devices: ["cuda:0"] train_data: "" test_data: "" diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml index 4eaf9c987a..a8b60c3926 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["meta-llama/Meta-Llama-3-8B-Instruct"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["meta-llama/Meta-Llama-3-8B-Instruct"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-3-8b"] devices: ["cuda:0"] train_data: "" test_data: "" diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml index 640f4c32f0..31e562c226 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["mistralai/Mistral-7B-Instruct-v0.1"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["mistralai/Mistral-7B-Instruct-v0.1"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["mistral"] devices: ["cuda:0"] train_data: "" test_data: "" diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml index e3accd3c2a..d3547152d3 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["microsoft/Phi-3-mini-4k-instruct"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["microsoft/Phi-3-mini-4k-instruct"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["phi-3-mini"] devices: ["cuda:0"] train_data: "" test_data: "" diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml index 3fc3537e49..286cb4a013 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["lmsys/vicuna-13b-v1.5"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["lmsys/vicuna-13b-v1.5"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["vicuna"] devices: ["cuda:0"] train_data: "" test_data: "" diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_all_models.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_all_models.yaml index f6f3ae3d58..351622dba3 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_all_models.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_all_models.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["meta-llama/Llama-2-7b-chat-hf", "mistralai/Mistral-7B-Instruc tokenizer_kwargs: [{"use_fast": False}, {"use_fast": False}, {"use_fast": False}, {"use_fast": False}] model_paths: ["meta-llama/Llama-2-7b-chat-hf", "mistralai/Mistral-7B-Instruct-v0.1", "meta-llama/Meta-Llama-3-8B-Instruct", "lmsys/vicuna-7b-v1.5"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}, {"low_cpu_mem_usage": True, "use_cache": False}, {"low_cpu_mem_usage": True, "use_cache": False}, {"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-2", "mistral", "llama-3-8b", "vicuna"] devices: ["cuda:0", "cuda:1", "cuda:2", "cuda:3"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_2.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_2.yaml index d7200b1776..fc3c824124 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_2.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_2.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["meta-llama/Llama-2-7b-chat-hf"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["meta-llama/Llama-2-7b-chat-hf"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-2"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_3.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_3.yaml index 5e538824c8..0b353b8769 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_3.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_3.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["meta-llama/Meta-Llama-3-8B-Instruct"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["meta-llama/Meta-Llama-3-8B-Instruct"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-3-8b"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_mistral.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_mistral.yaml index c2f808c185..4c6438a6af 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_mistral.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_mistral.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["mistralai/Mistral-7B-Instruct-v0.1"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["mistralai/Mistral-7B-Instruct-v0.1"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["mistral"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_phi_3_mini.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_phi_3_mini.yaml index 679556c3e4..35316c5d98 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_phi_3_mini.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_phi_3_mini.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["microsoft/Phi-3-mini-4k-instruct"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["microsoft/Phi-3-mini-4k-instruct"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["phi-3-mini"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_vicuna.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_vicuna.yaml index 2bbfe0a965..3850c95a21 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_vicuna.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_vicuna.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["lmsys/vicuna-7b-v1.5"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["lmsys/vicuna-7b-v1.5"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["vicuna"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/train.py b/pyrit/auxiliary_attacks/gcg/experiments/train.py index 4adf633d76..d5815f7bab 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/train.py +++ b/pyrit/auxiliary_attacks/gcg/experiments/train.py @@ -41,7 +41,6 @@ def generate_suffix( tokenizer_paths: Optional[list[str]] = None, model_name: str = "", model_paths: Optional[list[str]] = None, - conversation_templates: Optional[list[str]] = None, result_prefix: str = "", train_data: str = "", control_init: str = _DEFAULT_CONTROL_INIT, @@ -81,7 +80,6 @@ def generate_suffix( tokenizer_paths (Optional[list[str]]): Paths to tokenizer models. model_name (str): Name identifier for the model. model_paths (Optional[list[str]]): Paths to model weights. - conversation_templates (Optional[list[str]]): Conversation template names. result_prefix (str): Prefix for result file paths. train_data (str): URL or path to training data CSV. control_init (str): Initial control string for optimization. @@ -117,8 +115,6 @@ def generate_suffix( tokenizer_paths = [] if model_paths is None: model_paths = [] - if conversation_templates is None: - conversation_templates = [] if devices is None: devices = ["cuda:0"] if model_kwargs is None: @@ -131,7 +127,6 @@ def generate_suffix( tokenizer_paths=tokenizer_paths, model_name=model_name, model_paths=model_paths, - conversation_templates=conversation_templates, result_prefix=result_prefix, train_data=train_data, control_init=control_init, diff --git a/pyrit/auxiliary_attacks/gcg/src/Dockerfile b/pyrit/auxiliary_attacks/gcg/src/Dockerfile index e820fbaaf5..085d286b9d 100644 --- a/pyrit/auxiliary_attacks/gcg/src/Dockerfile +++ b/pyrit/auxiliary_attacks/gcg/src/Dockerfile @@ -19,5 +19,4 @@ WORKDIR /app # Install PyRIT with GCG extras to get all dependencies COPY pyproject.toml MANIFEST.in README.md LICENSE /app/ COPY pyrit/ /app/pyrit/ -RUN uv pip install -e ".[gcg]" && \ - uv pip install "fschat @ git+https://github.com/lm-sys/FastChat.git@2c68a13bfe10b86f40e3eefc3fcfacb32c00b02a" +RUN uv pip install -e ".[gcg]" diff --git a/tests/integration/auxiliary_attacks/test_gcg_integration.py b/tests/integration/auxiliary_attacks/test_gcg_integration.py index 8474fd0f3d..f2944bf480 100644 --- a/tests/integration/auxiliary_attacks/test_gcg_integration.py +++ b/tests/integration/auxiliary_attacks/test_gcg_integration.py @@ -5,9 +5,15 @@ These tests validate that the GCG attack pipeline works end-to-end with a real (tiny) model. They use GPT-2 (~124M params) which can run on CPU, paired with -the llama-2 conversation template (which has explicit handling in _update_ids). +explicit chat templates set on the tokenizer (since GPT-2 has no default +chat template). -Requires: torch, transformers, fastchat, mlflow (GCG optional deps). +After PR #965 dropped fastchat, ``AttackPrompt._update_ids`` uses +``tokenizer.apply_chat_template()`` exclusively, so we exercise that code path +with two distinct template shapes (llama-2 style and ChatML/phi-3 style) to +catch template-specific regressions. + +Requires: torch, transformers (GCG optional deps). Skipped via importorskip when deps are not installed. """ @@ -15,10 +21,8 @@ torch = pytest.importorskip("torch", reason="torch not installed") transformers = pytest.importorskip("transformers", reason="transformers not installed") -pytest.importorskip("fastchat", reason="fastchat not installed") -from fastchat.model import get_conversation_template # noqa: E402 from transformers import AutoTokenizer, GPT2LMHeadModel # noqa: E402 from pyrit.auxiliary_attacks.gcg.attack.base.attack_manager import ( # noqa: E402 @@ -33,6 +37,31 @@ token_gradients, ) +# Minimal Jinja chat templates that exercise the two structural variants we care about: +# (1) Inline role markers ("[INST]"/"[/INST]") used by llama-2. +# (2) Distinct role tokens ("<|user|>"/"<|assistant|>") used by phi-3 / ChatML. +# Both must produce findable goal/control/target substrings for the new +# apply_chat_template-based _update_ids to compute correct slices. +_LLAMA_STYLE_TEMPLATE = ( + "{%- for m in messages -%}" + "{%- if m['role'] == 'user' -%}" + "[INST] {{ m['content'] }} [/INST]" + "{%- elif m['role'] == 'assistant' -%}" + " {{ m['content'] }}" + "{%- endif -%}" + "{%- endfor -%}" +) + +_CHATML_STYLE_TEMPLATE = ( + "{%- for m in messages -%}" + "{%- if m['role'] == 'user' -%}" + "<|user|>\n{{ m['content'] }}<|end|>\n<|assistant|>\n" + "{%- elif m['role'] == 'assistant' -%}" + "{{ m['content'] }}<|end|>" + "{%- endif -%}" + "{%- endfor -%}" +) + @pytest.fixture(scope="module") def gpt2_model() -> GPT2LMHeadModel: @@ -40,33 +69,33 @@ def gpt2_model() -> GPT2LMHeadModel: return GPT2LMHeadModel.from_pretrained("gpt2").eval() -@pytest.fixture(scope="module") -def gpt2_tokenizer() -> transformers.PreTrainedTokenizer: - """Load GPT-2 tokenizer once for all tests in this module.""" +def _make_tokenizer(chat_template: str) -> transformers.PreTrainedTokenizer: + """Build a fresh GPT-2 tokenizer with the given chat template attached.""" tokenizer = AutoTokenizer.from_pretrained("gpt2") tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "left" + tokenizer.chat_template = chat_template return tokenizer @pytest.fixture() -def conv_template(): - """Create a fresh llama-2 conversation template for each test.""" - conv = get_conversation_template("llama-2") - conv.sep2 = conv.sep2.strip() - return conv +def gpt2_tokenizer() -> transformers.PreTrainedTokenizer: + """GPT-2 tokenizer with a llama-2-style chat template attached.""" + return _make_tokenizer(_LLAMA_STYLE_TEMPLATE) @pytest.fixture() -def vicuna_conv_template(): - """Create a fresh vicuna conversation template for each test. - - Vicuna exercises the non-llama branch of `_update_ids` (the path that - references `conv_template.system` and uses `encoding.char_to_token`). - A bug in that branch — like the Phi-3 `.system` AttributeError we hit - on Azure (#965) — would never be caught by llama-2-only tests. +def gpt2_chatml_tokenizer() -> transformers.PreTrainedTokenizer: + """GPT-2 tokenizer with a ChatML / phi-3-style chat template attached. + + Exercises the second structural variant we care about: distinct role tokens + (``<|user|>``/``<|assistant|>``) versus llama-2's inline ``[INST]`` markers. + Before PR #965 this template shape blew up in ``_update_ids`` because the + fastchat-driven slice logic referenced ``conv_template.system`` and other + template-specific attributes; after the apply_chat_template rewrite both + template shapes share a single code path. """ - return get_conversation_template("vicuna_v1.1") + return _make_tokenizer(_CHATML_STYLE_TEMPLATE) class TestTokenGradientsIntegration: @@ -102,20 +131,18 @@ def test_gradient_is_finite_and_nonzero( class TestGCGAttackPromptIntegration: - """Integration tests for GCGAttackPrompt with real GPT-2.""" + """Integration tests for GCGAttackPrompt with real GPT-2 + llama-style template.""" def test_prompt_initializes_with_valid_slices( self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer, - conv_template: object, ) -> None: """AttackPrompt should initialize with non-empty, non-overlapping slices.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", tokenizer=gpt2_tokenizer, - conv_template=conv_template, control_init="! ! ! ! !", ) @@ -128,14 +155,12 @@ def test_grad_returns_valid_gradient( self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer, - conv_template: object, ) -> None: """GCGAttackPrompt.grad should return a finite, non-zero gradient tensor.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", tokenizer=gpt2_tokenizer, - conv_template=conv_template, control_init="! ! ! ! !", ) @@ -150,14 +175,12 @@ def test_target_loss_is_finite_scalar( self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer, - conv_template: object, ) -> None: """Target loss from real model logits should be a finite positive number.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", tokenizer=gpt2_tokenizer, - conv_template=conv_template, control_init="! ! ! ! !", ) @@ -174,14 +197,12 @@ def test_sample_control_produces_valid_candidates( self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer, - conv_template: object, ) -> None: """Sampled control tokens should be decodable by the tokenizer.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", tokenizer=gpt2_tokenizer, - conv_template=conv_template, control_init="! ! ! ! !", ) @@ -228,39 +249,29 @@ def test_get_nonascii_toks_returns_nonempty_tensor(self, gpt2_tokenizer: transfo assert len(toks) > 0 -class TestGCGAttackPromptNonLlamaTemplate: - """Integration tests covering the non-llama branch of `AttackPrompt._update_ids`. - - The llama-2/llama-3 path is well-exercised above. The `else` branch contains - distinct logic that touches `conv_template.system`, `char_to_token`, and - different slice arithmetic. A bug here — like the Phi-3 `conv_template.system` - AttributeError we hit on Azure (#965) — would only surface with a - non-llama template, so we exercise it explicitly with vicuna. +class TestGCGAttackPromptChatMLTemplate: + """Integration tests covering ChatML / phi-3 style templates. - Both tests are currently `xfail` because vicuna (and any other modern - fastchat template that lacks a `.system` attribute) reproduces the same - AttributeError as Phi-3 — a known bug tracked in #965 that PR replacing - fastchat with `tokenizer.apply_chat_template()` will fix. Once that lands, - the xfail will flip to "unexpectedly passed" and the marker can be removed. + These exercise the second structural variant of chat templates (distinct + role tokens like ``<|user|>``/``<|assistant|>`` separated from content, + versus llama-2's inline ``[INST]`` markers). Before PR #965 dropped + fastchat, this template shape blew up in ``_update_ids`` because the + fastchat-driven slice logic referenced ``conv_template.system`` and other + template-specific attributes (the same Phi-3 ``AttributeError`` we hit on + Azure ML). After the apply_chat_template rewrite both shapes share a single + code path, so these tests should pass alongside the llama-style ones above. """ - @pytest.mark.xfail( - reason="#965: fastchat templates without `.system` attribute crash _update_ids", - raises=AttributeError, - strict=True, - ) - def test_prompt_initializes_with_vicuna_template( + def test_prompt_initializes_with_chatml_template( self, gpt2_model: GPT2LMHeadModel, - gpt2_tokenizer: transformers.PreTrainedTokenizer, - vicuna_conv_template: object, + gpt2_chatml_tokenizer: transformers.PreTrainedTokenizer, ) -> None: - """GCGAttackPrompt should construct successfully with the vicuna template.""" + """GCGAttackPrompt should construct successfully with a ChatML template.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", - tokenizer=gpt2_tokenizer, - conv_template=vicuna_conv_template, + tokenizer=gpt2_chatml_tokenizer, control_init="! ! ! ! !", ) @@ -269,23 +280,16 @@ def test_prompt_initializes_with_vicuna_template( assert prompt._control_slice.stop <= prompt._target_slice.start assert prompt.input_ids.shape[0] > 0 - @pytest.mark.xfail( - reason="#965: fastchat templates without `.system` attribute crash _update_ids", - raises=AttributeError, - strict=True, - ) - def test_grad_returns_valid_gradient_with_vicuna_template( + def test_grad_returns_valid_gradient_with_chatml_template( self, gpt2_model: GPT2LMHeadModel, - gpt2_tokenizer: transformers.PreTrainedTokenizer, - vicuna_conv_template: object, + gpt2_chatml_tokenizer: transformers.PreTrainedTokenizer, ) -> None: - """gradient computation should work end-to-end on the non-llama path.""" + """gradient computation should work end-to-end with a ChatML template.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", - tokenizer=gpt2_tokenizer, - conv_template=vicuna_conv_template, + tokenizer=gpt2_chatml_tokenizer, control_init="! ! ! ! !", ) @@ -293,5 +297,5 @@ def test_grad_returns_valid_gradient_with_vicuna_template( n_control = prompt._control_slice.stop - prompt._control_slice.start assert grad.shape[0] == n_control - assert grad.shape[1] == gpt2_tokenizer.vocab_size + assert grad.shape[1] == gpt2_chatml_tokenizer.vocab_size assert torch.isfinite(grad).all() diff --git a/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py b/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py index 7613a4780d..abb64786de 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py +++ b/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py @@ -43,11 +43,32 @@ def _make_mock_worker() -> MagicMock: - """Create a mock worker with required attributes for attack construction.""" + """Create a mock worker whose tokenizer can stand in for a real chat tokenizer. + + The wiring tests construct real ``GCGAttackPrompt`` instances which call + ``tokenizer.apply_chat_template`` and then walk character positions in the + rendered prompt. We need a real string + a tokenizer that can answer + ``char_to_token`` queries on it, so we back the mock with a real + distilgpt2 tokenizer (the smallest available transformers tokenizer that + ships with all the methods we touch). + """ + from transformers import AutoTokenizer + + real_tokenizer = AutoTokenizer.from_pretrained("gpt2") + real_tokenizer.pad_token = real_tokenizer.eos_token + real_tokenizer.chat_template = ( + "{%- for m in messages -%}" + "{%- if m['role'] == 'user' -%}" + "[INST] {{ m['content'] }} [/INST] " + "{%- elif m['role'] == 'assistant' -%}" + "{{ m['content'] }}" + "{%- endif -%}" + "{%- endfor -%}" + ) + worker = MagicMock() worker.model.name_or_path = "test-model" - worker.tokenizer.name_or_path = "test-tokenizer" - worker.conv_template.name = "test-template" + worker.tokenizer = real_tokenizer return worker diff --git a/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py b/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py index 2777bb1eea..ef5cc709c5 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py +++ b/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py @@ -94,7 +94,6 @@ def test_all_configs_have_required_keys(self, config_files: list[str]) -> None: required_keys = { "tokenizer_paths", "model_paths", - "conversation_templates", "devices", } for path in config_files: diff --git a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py index e1568e1a0f..cb3cdcff51 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py +++ b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py @@ -390,7 +390,7 @@ def test_transfer_true_creates_progressive(self) -> None: mock_worker = MagicMock() mock_worker.model.name_or_path = "test-model" mock_worker.tokenizer.name_or_path = "test-tokenizer" - mock_worker.conv_template.name = "test-template" + mock_worker.tokenizer.chat_template = "{{ messages[0]['content'] }}" managers = { "AP": MagicMock(), @@ -430,7 +430,7 @@ def test_transfer_false_creates_individual(self) -> None: mock_worker = MagicMock() mock_worker.model.name_or_path = "test-model" mock_worker.tokenizer.name_or_path = "test-tokenizer" - mock_worker.conv_template.name = "test-template" + mock_worker.tokenizer.chat_template = "{{ messages[0]['content'] }}" managers = { "AP": MagicMock(), @@ -484,7 +484,6 @@ def test_raises_on_mismatched_goals_targets(self) -> None: goals=["goal1", "goal2"], targets=["target1"], tokenizer=MagicMock(), - conv_template=MagicMock(), managers={"AP": MagicMock()}, ) @@ -494,7 +493,6 @@ def test_raises_on_empty_goals(self) -> None: goals=[], targets=[], tokenizer=MagicMock(), - conv_template=MagicMock(), managers={"AP": MagicMock()}, ) @@ -506,11 +504,11 @@ def test_raises_with_multiple_workers(self) -> None: mock_worker1 = MagicMock() mock_worker1.model.name_or_path = "m1" mock_worker1.tokenizer.name_or_path = "t1" - mock_worker1.conv_template.name = "c1" + mock_worker1.tokenizer.chat_template = "{{ messages[0]['content'] }}" mock_worker2 = MagicMock() mock_worker2.model.name_or_path = "m2" mock_worker2.tokenizer.name_or_path = "t2" - mock_worker2.conv_template.name = "c2" + mock_worker2.tokenizer.chat_template = "{{ messages[0]['content'] }}" with pytest.raises(ValueError, match="exactly 1 worker"): EvaluateAttack( diff --git a/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py b/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py index 83c616f535..ab42b5d961 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py +++ b/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest @@ -44,11 +44,11 @@ def test_workers_stopped_after_training( mock_worker1 = MagicMock() mock_worker1.model.name_or_path = "test-model-1" mock_worker1.tokenizer.name_or_path = "test-tokenizer-1" - mock_worker1.conv_template.name = "test-template-1" + mock_worker1.tokenizer.chat_template = "{{ messages[0]['content'] }}" mock_worker2 = MagicMock() mock_worker2.model.name_or_path = "test-model-2" mock_worker2.tokenizer.name_or_path = "test-tokenizer-2" - mock_worker2.conv_template.name = "test-template-2" + mock_worker2.tokenizer.chat_template = "{{ messages[0]['content'] }}" mock_get_workers.return_value = ([mock_worker1], [mock_worker2]) mock_attack_instance = MagicMock() @@ -61,7 +61,6 @@ def test_workers_stopped_after_training( generator.generate_suffix( tokenizer_paths=["test/path"], model_paths=["test/path"], - conversation_templates=["llama-2"], train_data="", n_steps=1, ) @@ -93,7 +92,7 @@ def test_workers_not_stopped_on_training_failure( mock_worker = MagicMock() mock_worker.model.name_or_path = "test-model" mock_worker.tokenizer.name_or_path = "test-tokenizer" - mock_worker.conv_template.name = "test-template" + mock_worker.tokenizer.chat_template = "{{ messages[0]['content'] }}" mock_get_workers.return_value = ([mock_worker], []) mock_attack_instance = MagicMock() @@ -108,7 +107,6 @@ def test_workers_not_stopped_on_training_failure( generator.generate_suffix( tokenizer_paths=["test/path"], model_paths=["test/path"], - conversation_templates=["llama-2"], train_data="", n_steps=1, ) From 863c4906c03fd80d51c9d4d5dd65514858411373 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Tue, 12 May 2026 13:15:52 -0700 Subject: [PATCH 21/24] FEAT: Add Phi-4 GCG config Adds an `individual_phi_4.yaml` config and registers `phi_4` in the GCG runner's known model list. Validated end-to-end on Azure ML alongside the apply_chat_template fastchat removal: a 5-step run on `microsoft/phi-4` completed successfully with `python -m pyrit.auxiliary_attacks.gcg.experiments.run --model_name phi_4 --setup single --n_train_data 5 --n_test_data 0 --n_steps 5 --batch_size 64`, producing a finite loss and non-trivial suffix. Note `tokenizer_kwargs` uses `use_fast: True` (Phi-4 ships with a fast tokenizer; `use_fast: False` is reserved for older models that need it, e.g. llama-2). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../experiments/configs/individual_phi_4.yaml | 29 +++++++++++++++++++ .../auxiliary_attacks/gcg/experiments/run.py | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_4.yaml diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_4.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_4.yaml new file mode 100644 index 0000000000..d080430016 --- /dev/null +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_4.yaml @@ -0,0 +1,29 @@ +transfer: False +target_weight: 1.0 +control_weight: 0.0 +progressive_goals: False +progressive_models: False +anneal: False +incr_control: False +stop_on_success: False +verbose: True +allow_non_ascii: False +num_train_models: 1 +result_prefix: "results/individual_phi4" +tokenizer_paths: ["microsoft/phi-4"] +tokenizer_kwargs: [{"use_fast": True}] +model_paths: ["microsoft/phi-4"] +model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] +devices: ["cuda:0"] +train_data: "" +test_data: "" +n_train_data: 50 +n_test_data: 0 +control_init: "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !" +n_steps: 500 +test_steps: 50 +batch_size: 512 +learning_rate: 0.01 +topk: 256 +temp: 1 +filter_cand: True diff --git a/pyrit/auxiliary_attacks/gcg/experiments/run.py b/pyrit/auxiliary_attacks/gcg/experiments/run.py index 7558388e1f..f78b6e220d 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/run.py +++ b/pyrit/auxiliary_attacks/gcg/experiments/run.py @@ -11,7 +11,7 @@ from pyrit.auxiliary_attacks.gcg.experiments.train import GreedyCoordinateGradientAdversarialSuffixGenerator from pyrit.setup.initialization import _load_environment_files -_MODEL_NAMES: list[str] = ["mistral", "llama_2", "llama_3", "vicuna", "phi_3_mini"] +_MODEL_NAMES: list[str] = ["mistral", "llama_2", "llama_3", "vicuna", "phi_3_mini", "phi_4"] _ALL_MODELS: str = "all_models" From 1ba1be431d3e434462b082f7cf8205c7ab3f2e6a Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Tue, 12 May 2026 21:08:39 -0700 Subject: [PATCH 22/24] TEST: cover error paths added by fastchat removal in attack_manager.py Adds unit tests exercising the four diff-coverage gaps flagged by CI on PR #1717: - AttackPrompt._update_ids() ValueError when goal/control/target don't appear verbatim in the chat-templated prompt (line 186). - start_tok() walking forward when char_to_token returns None at the initial position, then finding a mappable position later (line 210). - start_tok() falling back to len(toks) when no position from char_pos to end-of-prompt maps to a token (line 211). - get_workers() ValueError when a tokenizer has no chat_template configured (lines 1620-1621). Both start_tok tests use a fully mocked tokenizer because real tokenizers are too well-behaved (every byte position maps to some token) to deterministically exercise the None-handling branches. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../auxiliary_attacks/gcg/test_gcg_core.py | 146 ++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py index cb3cdcff51..79dcbf2231 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py +++ b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py @@ -517,3 +517,149 @@ def test_raises_with_multiple_workers(self) -> None: workers=[mock_worker1, mock_worker2], managers={"AP": MagicMock(), "PM": MagicMock(), "MPA": MagicMock()}, ) + + +class TestUpdateIdsErrorPaths: + """Tests covering the error / fallback paths in AttackPrompt._update_ids.""" + + def test_raises_when_substring_not_in_rendered_prompt(self) -> None: + """If the chat template strips/transforms goal/control/target so they don't appear + verbatim in the rendered prompt, _update_ids must raise a clear ValueError.""" + tokenizer = MagicMock() + # Chat template that drops the user content entirely — goal/control won't appear in prompt + tokenizer.apply_chat_template.return_value = "[INST] [/INST] hello" + # tokenizer(...) returns an encoding-like object + encoding = MagicMock() + encoding.input_ids = [1, 2, 3, 4] + encoding.char_to_token.return_value = 1 + tokenizer.return_value = encoding + + with pytest.raises(ValueError, match="Could not locate goal/control/target"): + AttackPrompt( + goal="this-goal-is-missing", + target="this-target-is-missing", + tokenizer=tokenizer, + control_init="this-control-is-missing", + ) + + def test_start_tok_walks_forward_when_initial_position_has_no_token(self) -> None: + """char_to_token returns None for the start position (e.g., whitespace squashed + into the previous token); start_tok must walk forward to the next mappable + character. Slices should still be valid.""" + # Use a fully mocked tokenizer so we can deterministically force char_to_token + # to return None at specific positions, otherwise real tokenizers usually map + # every byte and never trigger the fallback. + prompt_text = "USER hello !! ASSISTANT world" + toks = list(range(15)) + + def char_to_token(pos: int) -> int | None: + # Positions of "h" and "w" both return None; the next char does map. This + # exercises the cur += 1 walk-forward branch in start_tok. + char = prompt_text[pos] if 0 <= pos < len(prompt_text) else "" + if char in ("h", "w"): + return None + # Map remaining positions in a way that preserves slice ordering + return min(pos // 2, len(toks) - 1) + + encoding = MagicMock() + encoding.input_ids = toks + encoding.char_to_token.side_effect = char_to_token + + tokenizer = MagicMock() + tokenizer.apply_chat_template.return_value = prompt_text + tokenizer.return_value = encoding + + # Construction must succeed even though char_to_token returns None at goal/target + # start positions ("h" / "w"). + prompt = AttackPrompt( + goal="hello", + target="world", + tokenizer=tokenizer, + control_init="!!", + ) + assert isinstance(prompt._goal_slice.start, int) + assert isinstance(prompt._target_slice.start, int) + + def test_start_tok_returns_len_toks_when_no_position_maps(self) -> None: + """If char_to_token returns None for every position from char_pos to end-of-prompt, + start_tok must return len(toks) as a safe fallback (line 211).""" + prompt_text = "USER hello !! ASSISTANT world tail" + toks = list(range(20)) + + def char_to_token(pos: int) -> int | None: + char = prompt_text[pos] if 0 <= pos < len(prompt_text) else "" + # "tail" sits at end and never maps to a token (forces start_tok to exhaust + # the loop and hit `return len(toks)`); other content maps normally. + tail_start = prompt_text.find("tail") + if pos >= tail_start: + return None + return min(pos // 2, len(toks) - 1) + + encoding = MagicMock() + encoding.input_ids = toks + encoding.char_to_token.side_effect = char_to_token + + tokenizer = MagicMock() + tokenizer.apply_chat_template.return_value = prompt_text + tokenizer.return_value = encoding + + # "tail" as the target — its start position and every position after it returns + # None, so start_tok exits the while loop and returns len(toks). + prompt = AttackPrompt( + goal="hello", + target="tail", + tokenizer=tokenizer, + control_init="!!", + ) + assert prompt._target_slice.start == len(toks) + + def test_end_tok_returns_len_toks_when_target_is_at_prompt_end(self) -> None: + """If the target sits at the very end of the rendered prompt, + char_to_token(end_pos) returns None — end_tok must clamp to len(toks).""" + from transformers import AutoTokenizer + + tokenizer = AutoTokenizer.from_pretrained("gpt2") + tokenizer.pad_token = tokenizer.eos_token + tokenizer.chat_template = ( + "{%- for m in messages -%}" + "{%- if m['role'] == 'user' -%}" + "[INST] {{ m['content'] }} [/INST]" + "{%- elif m['role'] == 'assistant' -%}" + " {{ m['content'] }}" + "{%- endif -%}" + "{%- endfor -%}" + ) + + prompt = AttackPrompt( + goal="hello", + target="world", # this sits at end of rendered prompt with no trailing tokens + tokenizer=tokenizer, + control_init="! ! !", + ) + # _target_slice.stop should be len(toks), not None or NoneType arithmetic + assert isinstance(prompt._target_slice.stop, int) + assert prompt._target_slice.stop > prompt._target_slice.start + + +class TestGetWorkersChatTemplateValidation: + """Tests for the chat-template precondition in get_workers.""" + + def test_raises_when_tokenizer_has_no_chat_template(self) -> None: + """Models without a chat_template cannot be used with apply_chat_template-based + GCG; get_workers should raise a clear ValueError pointing to the cause.""" + from unittest.mock import patch + + get_workers = attack_manager_mod.get_workers + + params = MagicMock() + params.tokenizer_paths = ["fake/no-chat-template-model"] + params.token = "" + params.tokenizer_kwargs = [{}] + + bare_tokenizer = MagicMock() + bare_tokenizer.chat_template = None + bare_tokenizer.pad_token = "" + + with patch.object(attack_manager_mod.AutoTokenizer, "from_pretrained", return_value=bare_tokenizer): + with pytest.raises(ValueError, match="no chat_template configured"): + get_workers(params) From bdf9f1d28bfd48fd3b56e858f59cc23fe76b918a Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Wed, 13 May 2026 06:22:07 -0700 Subject: [PATCH 23/24] TEST: write GCG attack logfiles into tmp_path, not cwd _create_attack passes `logfile=f"{result_prefix}_{timestamp}.json'` directly to IndividualPromptAttack/ProgressiveMultiPromptAttack, whose __init__ writes that file as soon as it is non-None. Three tests were calling _build_params (or its helpers) with result_prefix="test", so each test left a test_.json artifact in the worktree root. Switch them to result_prefix=str(tmp_path / "test") so the logfile lands in pytest's per-test tmp dir and is cleaned up automatically. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py | 5 +++-- tests/unit/auxiliary_attacks/gcg/test_gcg_core.py | 9 +++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py b/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py index abb64786de..2a01fb7464 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py +++ b/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py @@ -7,6 +7,7 @@ and MultiPromptAttack.__init__(), and template compatibility issues in _update_ids(). """ +from pathlib import Path from unittest.mock import MagicMock, patch import pytest @@ -152,7 +153,7 @@ def test_progressive_attack_creates_mpa_without_error(self) -> None: filter_cand=True, ) - def test_create_attack_individual_wires_correctly(self) -> None: + def test_create_attack_individual_wires_correctly(self, tmp_path: Path) -> None: """_create_attack with transfer=False should produce an IndividualPromptAttack that can create internal MPA instances without error.""" worker = _make_mock_worker() @@ -160,7 +161,7 @@ def test_create_attack_individual_wires_correctly(self) -> None: params = Generator._build_params( transfer=False, control_init="! ! !", - result_prefix="test", + result_prefix=str(tmp_path / "test"), learning_rate=0.01, batch_size=64, n_steps=5, diff --git a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py index 79dcbf2231..12f00a0688 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py +++ b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +from pathlib import Path from unittest.mock import MagicMock import numpy as np @@ -368,7 +369,7 @@ def test_augmentation_is_seeded_reproducible(self) -> None: class TestCreateAttack: """Tests for GreedyCoordinateGradientAdversarialSuffixgenerator_cls._create_attack.""" - def test_transfer_true_creates_progressive(self) -> None: + def test_transfer_true_creates_progressive(self, tmp_path: Path) -> None: train_mod = pytest.importorskip( "pyrit.auxiliary_attacks.gcg.experiments.train", reason="GCG train module not available", @@ -380,7 +381,7 @@ def test_transfer_true_creates_progressive(self) -> None: progressive_models=True, progressive_goals=True, control_init="! ! !", - result_prefix="test", + result_prefix=str(tmp_path / "test"), gbda_deterministic=True, learning_rate=0.01, batch_size=512, @@ -410,7 +411,7 @@ def test_transfer_true_creates_progressive(self) -> None: ) assert isinstance(attack, ProgressiveMultiPromptAttack) - def test_transfer_false_creates_individual(self) -> None: + def test_transfer_false_creates_individual(self, tmp_path: Path) -> None: train_mod = pytest.importorskip( "pyrit.auxiliary_attacks.gcg.experiments.train", reason="GCG train module not available", @@ -420,7 +421,7 @@ def test_transfer_false_creates_individual(self) -> None: params = generator_cls._build_params( transfer=False, control_init="! ! !", - result_prefix="test", + result_prefix=str(tmp_path / "test"), gbda_deterministic=True, learning_rate=0.01, batch_size=512, From dcdc24746caea2994ba5547d11fd47edd8e6af32 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Wed, 13 May 2026 07:00:59 -0700 Subject: [PATCH 24/24] TEST: assert sample_control changes at most one position GCGPromptManager.sample_control replaces exactly one position per candidate, but the new token is sampled uniformly from the top-k of (-grad). When that draw happens to land on the same id already at that position the candidate ends up identical to the original (diffs == 0). The previous assertion (diffs == 1) was therefore flaky against the underlying randint, and CI hit it on PR #1049. Relax to 'at most 1' and document why; ran the test 20x locally with no failures. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/unit/auxiliary_attacks/gcg/test_gcg_core.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py index 12f00a0688..c71dacf0f5 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py +++ b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py @@ -227,8 +227,14 @@ def test_output_tokens_within_vocab(self) -> None: assert (result >= 0).all() assert (result < vocab_size).all() - def test_each_candidate_differs_in_one_position(self) -> None: - """Each candidate should differ from the original in exactly one position.""" + def test_each_candidate_differs_in_at_most_one_position(self) -> None: + """Each candidate replaces exactly one position with a token sampled from top-k. + + The replacement token is drawn uniformly from top-k, so it may equal the + original token at that position (giving diffs == 0). The function only + guarantees that *at most* one position differs from the original; asserting + exactly one would make the test flaky against the underlying randomness. + """ n_control = 10 vocab_size = 50 batch_size = 8 @@ -240,8 +246,7 @@ def test_each_candidate_differs_in_one_position(self) -> None: for i in range(batch_size): diffs = (result[i] != original_toks.to(result.device)).sum().item() - # Each candidate changes exactly 1 position - assert diffs == 1, f"Candidate {i} differs in {diffs} positions, expected 1" + assert diffs <= 1, f"Candidate {i} differs in {diffs} positions, expected at most 1" def test_non_ascii_filtering(self) -> None: """When allow_non_ascii=False, the newly sampled token should not be non-ASCII.