oliverdk commited on Dec 17, 2024

Commit

ba25241

verified ·

1 Parent(s): b06adb9

End of training

Browse files

Files changed (22) hide show

.hydra/config.yaml +17 -0
.hydra/hydra.yaml +182 -0
.hydra/overrides.yaml +1 -0
README.md +57 -180
added_tokens.json +40 -0
config.json +2 -1
configuration_measurement_pred.py +2 -3
logs/events.out.tfevents.1734449259.gail.ist.berkeley.edu.2164200.0 +3 -0
logs/events.out.tfevents.1734449259.gail.ist.berkeley.edu.2164202.0 +3 -0
merges.txt +0 -0
model.safetensors +1 -1
modeling_measurement_pred.py +14 -12
sensor_loc_finder.py +17 -0
sensor_loc_reg.py +10 -0
sensor_loc_stories.py +46 -0
sensor_locs_from_token.py +16 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer_config.json +327 -0
train.log +2 -0
training_args.bin +3 -0
vocab.json +0 -0

.hydra/config.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+model:
+  dataset_name: redwoodresearch/diamonds-seed1
+  model_type: codegen
+  pretrained_model_name: Salesforce/codegen-350M-mono
+  max_length: 1024
+hparams:
+  learning_rate: 2.0e-05
+  weight_decay: 0.02
+  lr_scheduler_type: cosine
+  warmup_steps: 64
+  effective_batch_size: 32
+  num_train_epochs: 5
+per_device_train_batch_size: 4
+per_device_eval_batch_size: 4
+fp16: true
+dataset_len: null
+push_to_hub: true

.hydra/hydra.yaml ADDED Viewed

	@@ -0,0 +1,182 @@

+hydra:
+  run:
+    dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    submitit_folder: ${hydra.sweep.dir}/.submitit/%j
+    timeout_min: 1440
+    cpus_per_task: null
+    gpus_per_node: null
+    tasks_per_node: 1
+    mem_gb: 16
+    nodes: 1
+    name: ${hydra.job.name}
+    stderr_to_stdout: false
+    _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
+    partition: null
+    qos: high
+    comment: null
+    constraint: null
+    exclude: ddpg.ist.berkeley.edu,dqn.ist.berkeley.edu
+    gres: gpu:A6000:1
+    cpus_per_gpu: null
+    gpus_per_task: null
+    mem_per_gpu: null
+    mem_per_cpu: null
+    account: null
+    signal_delay_s: 120
+    max_num_timeout: 0
+    additional_parameters: {}
+    array_parallelism: 256
+    setup: null
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+      Use --hydra-help to view Hydra specific help
+      '
+    template: '${hydra.help.header}
+      == Configuration groups ==
+      Compose your configuration from those groups (group=option)
+      $APP_CONFIG_GROUPS
+      == Config ==
+      Override anything in the config (foo.bar=value)
+      $CONFIG
+      ${hydra.help.footer}
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+      See https://hydra.cc for more info.
+      == Flags ==
+      $FLAGS_HELP
+      == Configuration groups ==
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+      $HYDRA_CONFIG_GROUPS
+      Use ''--cfg hydra'' to Show the Hydra config.
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: MULTIRUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=MULTIRUN
+    task:
+    - model.dataset_name=redwoodresearch/diamonds-seed1
+  job:
+    name: train
+    chdir: null
+    override_dirname: model.dataset_name=redwoodresearch/diamonds-seed1
+    id: '747438'
+    num: 0
+    config_name: codegen_diamonds_slurm
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.3.2
+    version_base: '1.1'
+    cwd: /nas/ucb/oliveradk/measurement-pred
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: /nas/ucb/oliveradk/measurement-pred/conf
+      schema: file
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /nas/ucb/oliveradk/measurement-pred/multirun/2024-12-17/07-26-22/0
+    choices:
+      hparams: hparams
+      model: codegen_diamonds
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: slurm_chai
+      hydra/output: default
+  verbose: false

.hydra/overrides.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ - model.dataset_name=redwoodresearch/diamonds-seed1

README.md CHANGED Viewed

@@ -1,199 +1,76 @@
 ---
-library_name: transformers
-tags: []
 ---
-# Model Card for Model ID
-<!-- Provide a quick summary of what the model is/does. -->
-## Model Details
-### Model Description
-<!-- Provide a longer summary of what this model is. -->
-This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
-- **Developed by:** [More Information Needed]
-- **Funded by [optional]:** [More Information Needed]
-- **Shared by [optional]:** [More Information Needed]
-- **Model type:** [More Information Needed]
-- **Language(s) (NLP):** [More Information Needed]
-- **License:** [More Information Needed]
-- **Finetuned from model [optional]:** [More Information Needed]
-### Model Sources [optional]
-<!-- Provide the basic links for the model. -->
-- **Repository:** [More Information Needed]
-- **Paper [optional]:** [More Information Needed]
-- **Demo [optional]:** [More Information Needed]
-## Uses
-<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
-### Direct Use
-<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
-[More Information Needed]
-### Downstream Use [optional]
-<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
-[More Information Needed]
-### Out-of-Scope Use
-<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
-[More Information Needed]
-## Bias, Risks, and Limitations
-<!-- This section is meant to convey both technical and sociotechnical limitations. -->
-[More Information Needed]
-### Recommendations
-<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
-Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
-## How to Get Started with the Model
-Use the code below to get started with the model.
-[More Information Needed]
-## Training Details
-### Training Data
-<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
-[More Information Needed]
-### Training Procedure
-<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
-#### Preprocessing [optional]
-[More Information Needed]
-#### Training Hyperparameters
-- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
-#### Speeds, Sizes, Times [optional]
-<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
-[More Information Needed]
-## Evaluation
-<!-- This section describes the evaluation protocols and provides the results. -->
-### Testing Data, Factors & Metrics
-#### Testing Data
-<!-- This should link to a Dataset Card if possible. -->
-[More Information Needed]
-#### Factors
-<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
-[More Information Needed]
-#### Metrics
-<!-- These are the evaluation metrics being used, ideally with a description of why. -->
-[More Information Needed]
-### Results
-[More Information Needed]
-#### Summary
-## Model Examination [optional]
-<!-- Relevant interpretability work for the model goes here -->
-[More Information Needed]
-## Environmental Impact
-<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
-Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
-- **Hardware Type:** [More Information Needed]
-- **Hours used:** [More Information Needed]
-- **Cloud Provider:** [More Information Needed]
-- **Compute Region:** [More Information Needed]
-- **Carbon Emitted:** [More Information Needed]
-## Technical Specifications [optional]
-### Model Architecture and Objective
-[More Information Needed]
-### Compute Infrastructure
-[More Information Needed]
-#### Hardware
-[More Information Needed]
-#### Software
-[More Information Needed]
-## Citation [optional]
-<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
-**BibTeX:**
-[More Information Needed]
-**APA:**
-[More Information Needed]
-## Glossary [optional]
-<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
-[More Information Needed]
-## More Information [optional]
-[More Information Needed]
-## Model Card Authors [optional]
-[More Information Needed]
-## Model Card Contact
-[More Information Needed]

 ---
+license: bsd-3-clause
+base_model: Salesforce/codegen-350M-mono
+tags:
+- generated_from_trainer
+metrics:
+- accuracy
+model-index:
+- name: codegen-350M-mono-measurement_pred-diamonds-seed2
+  results: []
 ---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# codegen-350M-mono-measurement_pred-diamonds-seed2
+This model is a fine-tuned version of [Salesforce/codegen-350M-mono](https://huggingface.co/Salesforce/codegen-350M-mono) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.4023
+- Accuracy: 0.9108
+- Accuracy Sensor 0: 0.9220
+- Auroc Sensor 0: 0.9580
+- Accuracy Sensor 1: 0.9109
+- Auroc Sensor 1: 0.9645
+- Accuracy Sensor 2: 0.9260
+- Auroc Sensor 2: 0.9611
+- Accuracy Aggregated: 0.8845
+- Auroc Aggregated: 0.9532
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 32
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 64
+- num_epochs: 5
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Accuracy | Accuracy Sensor 0 | Auroc Sensor 0 | Accuracy Sensor 1 | Auroc Sensor 1 | Accuracy Sensor 2 | Auroc Sensor 2 | Accuracy Aggregated | Auroc Aggregated |
+|:-------------:|:------:|:----:|:---------------:|:--------:|:-----------------:|:--------------:|:-----------------:|:--------------:|:-----------------:|:--------------:|:-------------------:|:----------------:|
+| 0.3009        | 0.9997 | 781  | 0.4552          | 0.8074   | 0.8220            | 0.9041         | 0.8092            | 0.9255         | 0.8372            | 0.9304         | 0.7610              | 0.9026           |
+| 0.1989        | 1.9994 | 1562 | 0.3633          | 0.8595   | 0.8835            | 0.9425         | 0.8544            | 0.9520         | 0.8757            | 0.9517         | 0.8244              | 0.9351           |
+| 0.1335        | 2.9990 | 2343 | 0.3032          | 0.8924   | 0.8985            | 0.9529         | 0.8877            | 0.9608         | 0.9246            | 0.9573         | 0.8588              | 0.9463           |
+| 0.093         | 4.0    | 3125 | 0.3016          | 0.9138   | 0.9203            | 0.9581         | 0.9131            | 0.9651         | 0.9304            | 0.9609         | 0.8914              | 0.9529           |
+| 0.0432        | 4.9984 | 3905 | 0.4023          | 0.9108   | 0.9220            | 0.9580         | 0.9109            | 0.9645         | 0.9260            | 0.9611         | 0.8845              | 0.9532           |
+### Framework versions
+- Transformers 4.41.0
+- Pytorch 2.3.0+cu121
+- Datasets 2.19.1
+- Tokenizers 0.19.1

added_tokens.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "\t\t": 50294,
+  "\t\t\t": 50293,
+  "\t\t\t\t": 50292,
+  "\t\t\t\t\t": 50291,
+  "\t\t\t\t\t\t": 50290,
+  "\t\t\t\t\t\t\t": 50289,
+  "\t\t\t\t\t\t\t\t": 50288,
+  "\t\t\t\t\t\t\t\t\t": 50287,
+  "  ": 50286,
+  "   ": 50285,
+  "    ": 50284,
+  "     ": 50283,
+  "      ": 50282,
+  "       ": 50281,
+  "        ": 50280,
+  "         ": 50279,
+  "          ": 50278,
+  "           ": 50277,
+  "            ": 50276,
+  "             ": 50275,
+  "              ": 50274,
+  "               ": 50273,
+  "                ": 50272,
+  "                 ": 50271,
+  "                  ": 50270,
+  "                   ": 50269,
+  "                    ": 50268,
+  "                     ": 50267,
+  "                      ": 50266,
+  "                       ": 50265,
+  "                        ": 50264,
+  "                         ": 50263,
+  "                          ": 50262,
+  "                           ": 50261,
+  "                            ": 50260,
+  "                             ": 50259,
+  "                              ": 50258,
+  "                               ": 50257
+}

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "multirun/2024-12-16/09-40-07/0/checkpoint-3905",
   "activation_function": "gelu_new",
   "aggregate_weight": 0.3,
   "architectures": [
@@ -28,6 +28,7 @@
   "resid_pdrop": 0.0,
   "rotary_dim": 32,
   "scale_attn_weights": true,
   "sensor_token": " omit",
   "sensor_token_id": 42848,
   "sensors_weight": 0.7,

 {
+  "_name_or_path": "Salesforce/codegen-350M-mono",
   "activation_function": "gelu_new",
   "aggregate_weight": 0.3,
   "architectures": [
   "resid_pdrop": 0.0,
   "rotary_dim": 32,
   "scale_attn_weights": true,
+  "sensor_loc_type": "locs_from_token",
   "sensor_token": " omit",
   "sensor_token_id": 42848,
   "sensors_weight": 0.7,

configuration_measurement_pred.py CHANGED Viewed

@@ -1,12 +1,11 @@
 from abc import abstractmethod
 from transformers import PretrainedConfig
 class MeasurementPredictorConfig(PretrainedConfig):
     def __init__(
         self,
         sensor_token=" omit",
-        sensor_token_id=None, # 35991
         n_sensors=3,
         use_aggregated=True,
         sensors_weight = 0.7,
@@ -14,7 +13,7 @@ class MeasurementPredictorConfig(PretrainedConfig):
         **kwargs
     ):
         self.sensor_token = sensor_token
-        self.sensor_token_id = sensor_token_id
         self.n_sensors = n_sensors
         self.use_aggregated = use_aggregated
         self.sensors_weight = sensors_weight

 from abc import abstractmethod
 from transformers import PretrainedConfig
 class MeasurementPredictorConfig(PretrainedConfig):
     def __init__(
         self,
         sensor_token=" omit",
+        sensor_loc_type="locs_from_token",
         n_sensors=3,
         use_aggregated=True,
         sensors_weight = 0.7,
         **kwargs
     ):
         self.sensor_token = sensor_token
+        self.sensor_loc_type = sensor_loc_type
         self.n_sensors = n_sensors
         self.use_aggregated = use_aggregated
         self.sensors_weight = sensors_weight

logs/events.out.tfevents.1734449259.gail.ist.berkeley.edu.2164200.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2e41afc7f407fc05f1fa23c0ac11383aa446decd69608ac285b2bde2241d367
+size 16069

logs/events.out.tfevents.1734449259.gail.ist.berkeley.edu.2164202.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:faefbd84b6d34b212da301a506667018a7d1ae306ac727bb6e7dea778bc6f2df
+size 14912

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0dff3d164bec1dccddb85a3b0c4e393ffb9b57001d335231add2b7374e4520b
 size 1216963976

 version https://git-lfs.github.com/spec/v1
+oid sha256:57651a18b0a367e6c0ffaa97666d103bcf42708a6d8abab94d3cf7704204ad7d
 size 1216963976

modeling_measurement_pred.py CHANGED Viewed

@@ -3,14 +3,19 @@ from typing import Optional, Tuple, Union
 import torch
 from torch.nn import BCEWithLogitsLoss
 from transformers import PreTrainedModel, PreTrainedTokenizer
 from transformers.modeling_outputs import BaseModelOutputWithPast, SequenceClassifierOutputWithPast
 class MeasurementPredictorMixin(PreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
         self.sensor_token = config.sensor_token
-        self.sensor_token_id = config.sensor_token_id
         self.n_sensors = config.n_sensors
         self.sensor_probes = torch.nn.ModuleList([
             torch.nn.Linear(config.emb_dim, 1) for _ in range(config.n_sensors)
@@ -20,15 +25,13 @@ class MeasurementPredictorMixin(PreTrainedModel):
             self.aggregate_probe = torch.nn.Linear(config.emb_dim, 1)
         self.sensors_weight = config.sensors_weight
         self.aggregate_weight = config.aggregate_weight
-    def check_tokenizer(self, tokenizer: PreTrainedTokenizer):
-        sensor_token_id = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(self.sensor_token))[0]
-        assert sensor_token_id == self.sensor_token_id
-    def set_sensor_token(self, sensor_token: str, tokenizer: PreTrainedTokenizer):
-        sensor_token_id = tokenizer.tokenize(sensor_token)[0]
-        self.sensor_token = sensor_token
-        self.sensor_token_id = sensor_token_id
     def forward(
         self,
@@ -64,10 +67,9 @@ class MeasurementPredictorMixin(PreTrainedModel):
             output_hidden_states=output_hidden_states,
             return_dict=return_dict,
         )
-        flat_tensor_token_idxs = (input_ids == self.sensor_token_id).nonzero(as_tuple=True)[1]
-        tensor_token_idxs = flat_tensor_token_idxs.view(-1, self.n_sensors)
         sensor_embs = base_model_output.last_hidden_state.gather(
-            1, tensor_token_idxs.unsqueeze(-1).expand(-1, -1, self.config.emb_dim)
         )
         assert sensor_embs.shape == (input_ids.shape[0], self.n_sensors, self.config.emb_dim), f"{sensor_embs.shape} != {(input_ids.shape[0], self.n_sensors, self.config.emb_dim)}"
         sensor_logits = torch.concat([self.sensor_probes[i](sensor_embs[:, i, :])

 import torch
 from torch.nn import BCEWithLogitsLoss
 from transformers import PreTrainedModel, PreTrainedTokenizer
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase
 from transformers.modeling_outputs import BaseModelOutputWithPast, SequenceClassifierOutputWithPast
+from .sensor_loc_reg import SENSOR_LOC_REGISTRY
+from .sensor_loc_finder import SensorLocFinder
 class MeasurementPredictorMixin(PreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
+        self.sensor_loc_type = config.sensor_loc_type
         self.sensor_token = config.sensor_token
         self.n_sensors = config.n_sensors
         self.sensor_probes = torch.nn.ModuleList([
             torch.nn.Linear(config.emb_dim, 1) for _ in range(config.n_sensors)
             self.aggregate_probe = torch.nn.Linear(config.emb_dim, 1)
         self.sensors_weight = config.sensors_weight
         self.aggregate_weight = config.aggregate_weight
+        self.get_sensor_locs: SensorLocFinder = None
+    def init_sensor_loc_finder(self, tokenizer: PreTrainedTokenizerBase):
+        self.get_sensor_locs = SENSOR_LOC_REGISTRY[self.sensor_loc_type](
+            tokenizer, sensor_token=self.sensor_token, n_sensors=self.n_sensors
+        )
     def forward(
         self,
             output_hidden_states=output_hidden_states,
             return_dict=return_dict,
         )
+        sensor_locs = self.get_sensor_locs(input_ids)
         sensor_embs = base_model_output.last_hidden_state.gather(
+            1, sensor_locs.unsqueeze(-1).expand(-1, -1, self.config.emb_dim)
         )
         assert sensor_embs.shape == (input_ids.shape[0], self.n_sensors, self.config.emb_dim), f"{sensor_embs.shape} != {(input_ids.shape[0], self.n_sensors, self.config.emb_dim)}"
         sensor_logits = torch.concat([self.sensor_probes[i](sensor_embs[:, i, :])

sensor_loc_finder.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from abc import ABC, abstractmethod
+import torch
+from transformers import PreTrainedTokenizerBase
+class SensorLocFinder(ABC):
+    @abstractmethod
+    def __init__(self, tokenizer: PreTrainedTokenizerBase, **kwargs):
+        pass
+    @abstractmethod
+    def find_sensor_locs(self, input_ids: torch.Tensor) -> torch.Tensor:
+        pass
+    def __call__(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.find_sensor_locs(input_ids)

sensor_loc_reg.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from enum import Enum
+from .sensor_loc_stories import StoriesSensorLocFinder
+from .sensor_locs_from_token import SensorLocFinderFromToken
+SENSOR_LOC_REGISTRY = {
+    "stories": StoriesSensorLocFinder,
+    "locs_from_token": SensorLocFinderFromToken
+}

sensor_loc_stories.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import torch
+from transformers import PreTrainedTokenizerBase
+from .sensor_loc_finder import SensorLocFinder
+class StoriesSensorLocFinder(SensorLocFinder):
+    def __init__(self, tokenizer: PreTrainedTokenizerBase, **kwargs):
+        self.questions_section_toks = tokenizer.encode("## Questions")
+        self.question_mark_tok = tokenizer.encode("?")[0]
+        self.other_question_mark_tok = tokenizer.encode(")?")[0]
+        assert len(self.questions_section_toks) == 2
+    def find_sensor_locs(self, input_ids: torch.Tensor) -> torch.Tensor:
+        device = input_ids.device
+        question_mark_locs = self._is_sensor_loc(input_ids)
+        total_locs = torch.cumsum(question_mark_locs, dim=-1)
+        total_overall = total_locs[:, -1]
+        assert (
+            total_overall == 3
+        ).all(), "can handle different cases, but assuming this is easiest"
+        eqs = total_locs[:, :, None] == torch.arange(1, 4)[None, None].to(device)
+        locs = torch.where(
+            eqs.any(dim=-2),
+            torch.argmax(eqs.to(torch.uint8), dim=-2),
+            input_ids.shape[-1] - 3,
+        ).clamp(max=input_ids.shape[-1] - 3)
+        return locs
+    def _is_sensor_loc(self, input_ids: torch.Tensor):
+        questions_section_toks = self.questions_section_toks
+        question_mark_tok = self.question_mark_tok
+        other_question_mark_tok = self.other_question_mark_tok
+        eq_question_item = (input_ids[:, :-1] == questions_section_toks[0]) & (
+            input_ids[:, 1:] == questions_section_toks[1]
+        )
+        assert (eq_question_item.sum(dim=-1, dtype=torch.int) == 1).all(), "could relax"
+        summed = torch.cumsum(
+            torch.cat([eq_question_item, eq_question_item[:, -1:]], dim=-1), dim=-1
+        )
+        return (summed > 0) & (
+            (input_ids == question_mark_tok) | (input_ids == other_question_mark_tok)
+        )

sensor_locs_from_token.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import torch
+from transformers import PreTrainedTokenizerBase
+from .sensor_loc_finder import SensorLocFinder
+class SensorLocFinderFromToken(SensorLocFinder):
+    def __init__(self, tokenizer: PreTrainedTokenizerBase, sensor_token: str, n_sensors: int):
+        self.sensor_token_id = tokenizer.encode(sensor_token)[0]
+        self.n_sensors = n_sensors
+    def find_sensor_locs(self, input_ids: torch.Tensor) -> torch.Tensor:
+        flat_sensor_token_idxs = (input_ids == self.sensor_token_id).nonzero(as_tuple=True)[1]
+        sensor_token_idxs = flat_sensor_token_idxs.view(-1, self.n_sensors)
+        return sensor_token_idxs

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,327 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50257": {
+      "content": "                               ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50258": {
+      "content": "                              ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50259": {
+      "content": "                             ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50260": {
+      "content": "                            ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50261": {
+      "content": "                           ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50262": {
+      "content": "                          ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50263": {
+      "content": "                         ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50264": {
+      "content": "                        ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50265": {
+      "content": "                       ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50266": {
+      "content": "                      ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50267": {
+      "content": "                     ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50268": {
+      "content": "                    ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50269": {
+      "content": "                   ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50270": {
+      "content": "                  ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50271": {
+      "content": "                 ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50272": {
+      "content": "                ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50273": {
+      "content": "               ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50274": {
+      "content": "              ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50275": {
+      "content": "             ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50276": {
+      "content": "            ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50277": {
+      "content": "           ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50278": {
+      "content": "          ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50279": {
+      "content": "         ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50280": {
+      "content": "        ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50281": {
+      "content": "       ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50282": {
+      "content": "      ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50283": {
+      "content": "     ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50284": {
+      "content": "    ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50285": {
+      "content": "   ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50286": {
+      "content": "  ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50287": {
+      "content": "\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50288": {
+      "content": "\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50289": {
+      "content": "\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50290": {
+      "content": "\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50291": {
+      "content": "\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50292": {
+      "content": "\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50293": {
+      "content": "\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50294": {
+      "content": "\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 2048,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "return_token_type_ids": false,
+  "tokenizer_class": "CodeGenTokenizer",
+  "truncation_side": "left",
+  "unk_token": "<|endoftext|>"
+}

train.log ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [2024-12-17 07:27:38,728][accelerate.utils.other][WARNING] - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
2	+ [2024-12-17 07:27:38,922][accelerate.utils.other][WARNING] - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:960624068298125ef58d6947bf92ea482b87512ea4ea911570cf8693922ebad6
+size 5112

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff