New report

Created on February 16|Last edited on February 16
Comment
﻿
﻿
[project("stacey", "wave").artifact("source-wave-None").membershipForAlias("v322").artifactVersion, project("stacey", "wave").artifact("source-wave-None").membershipForAlias("2ee18cc8b0e41ae547e9").artifactVersion].map((row) => row.file("colab.ipynb"))
Expand 57 lines ...
## 1A Load model from Tinybird MR## 1A Load model from Tinybird MR
Available variants:Available variants:
* best_tuned* best_tuned
-* longest_tune
* base* base
* medium* medium
* large (possibly—not sure we can tokenize for it)* large (possibly—not sure we can tokenize for it)
# %%# %%
-## path to the registered model+# path to the registered model
# just wrap this function to do both?!?# just wrap this function to do both?!?
GPT_RM = "stacey/model-registry/tinybird"GPT_RM = "stacey/model-registry/tinybird"
#FT_MODEL = 'stacey/wave/gpt2-med-tune-3-compose-diff-templates:v0'#FT_MODEL = 'stacey/wave/gpt2-med-tune-3-compose-diff-templates:v0'
Expand 28 lines ...
  return model  return model
-model = load_model_variant("best_tuned", model_type=MODEL_TYPE, run_prefix="resurrect_")+#model = load_model_variant("best_tuned", model_type=MODEL_TYPE, run_prefix="resurrect_")
-#model = load_model_variant("longest_tune", model_type=MODEL_TYPE, run_prefix="resurrect_")+model = load_model_variant("medium", model_type=MODEL_TYPE, run_prefix="resurrect_")
# %%# %%
to_tune = load_model_variant("medium")to_tune = load_model_variant("medium")
# %%# %%
## 1B Load any model from artifact## 1B Load any model from artifact
# %%# %%
Expand 70 lines ...
+NUM_SAMPLES = 10
NUM_TOKENS = 200NUM_TOKENS = 200
from transformers import Trainerfrom transformers import Trainer
from transformers import TrainingArgumentsfrom transformers import TrainingArguments
training_args = TrainingArguments(training_args = TrainingArguments(
Expand 52 lines ...
-def gen_results(model, prompt, max_tokens=MAX_TOKENS, num_samples=NUM_SAMPLES, mode="pretty"):+def gen_results(model, prompt, max_tokens=MAX_TOKENS, num_samples=10):
  device = "cuda"  device = "cuda"
  tokenizer = GPT2Tokenizer.from_pretrained(MODEL_TYPE)  tokenizer = GPT2Tokenizer.from_pretrained(MODEL_TYPE)
  encoded_input = tokenizer(prompt, return_tensors='pt').to(device)  encoded_input = tokenizer(prompt, return_tensors='pt').to(device)
  x = encoded_input['input_ids']  x = encoded_input['input_ids']
  x = x.expand(num_samples, -1)  x = x.expand(num_samples, -1)
  y = model.generate(x, max_new_tokens=max_tokens, do_sample=True, top_k=40)  y = model.generate(x, max_new_tokens=max_tokens, do_sample=True, top_k=40)
-    if mode == "pretty":+    data.append([prompt, out])
Expand 4 lines ...
-      html_result = pretty(prompt, out)
-      data.append([prompt, html_result])
-    else:
-      data.append([prompt, out])
  return wandb.Table(data=data, columns=["prompt", "response"])  return wandb.Table(data=data, columns=["prompt", "response"])
-  
# %%# %%
## 4A Preset evaluation prompt setup## 4A Preset evaluation prompt setup
# %%# %%
poem_titles = [poem_titles = [
    "The Beach Plum",    "The Beach Plum",
    "The Inevitability of Side Effects",    "The Inevitability of Side Effects",
    "The Edge of the World",    "The Edge of the World",
    "Chirality",    "Chirality",
-def infer(prompts, prompt_type, template, run_name="", notes=""):+def infer(prompts, prompt_type, template, notes=""):
-  wandb.init(project=PROJECT, name=run_name, job_type="live-explore")+  wandb.init(project=PROJECT, job_type="live-explore")
Expand 104 lines ...
  # is this an annoying hack..?  # is this an annoying hack..?
  wandb.run.use_artifact("stacey/model-registry/tinybird:best_tuned")  wandb.run.use_artifact("stacey/model-registry/tinybird:best_tuned")
# what do we want to put in the metadata for a series of prompt inputs?# what do we want to put in the metadata for a series of prompt inputs?
# - notes (what i was thinking/why)# - notes (what i was thinking/why)
# - timestamp# - timestamp
# - some sort of version / phase of exploration?# - some sort of version / phase of exploration?
# - template for how this should be loaded# - template for how this should be loaded
  human_time, timestamp = get_timestamps()  human_time, timestamp = get_timestamps()
  cfg = {  cfg = {
-    wandb.log({f"infer_{i}": table})+    wandb.log({f"infer_live_{i}": table})
-    #for j, row in enumerate(table.data):+    for j, row in enumerate(table.data):
-      #  print(f"Prompt:{row[0]}")+      print(f"Prompt:{row[0]}")
Expand 12 lines ...
-      #  print(f"Response:{row[1]}")+      print(f"Response:{row[1]}")
      #new_table_data.append([pretty(row[0], row[1])])      #new_table_data.append([pretty(row[0], row[1])])
    #wandb.log({f"infer_live_{i}": wandb.Table(data=new_table_data, columns=["completion"])})    #wandb.log({f"infer_live_{i}": wandb.Table(data=new_table_data, columns=["completion"])})
      #wandb.log({f"infer_live_{j}": html})      #wandb.log({f"infer_live_{j}": html})
  wandb.run.finish()  wandb.run.finish()
# %%# %%
# %%# %%
#  6 Load model interactively#  6 Load model interactively
# %%# %%
live_text = [live_text = [
    "Title: The Fog. Poem:",    "Title: The Fog. Poem:",
    "Title: The Oncoming Storm. Poem:",    "Title: The Oncoming Storm. Poem:",
-infer(mon_5, "compose-explore", "Title: X. {Poem,Lyrics,Article}:", run_name="mon_5_test_best_tune", notes="comparing to longest_tune variant")+infer(mon_5, "compose-explore", "Title: X. {Poem,Lyrics,Article}:", notes="comparing to medium variant on some new prompts")
Expand 55 lines ...
  return wandb.Html(html)  return wandb.Html(html)
# %%# %%
html='<html><span style="color:#FF0000;background-color:green">H</span><span style="color:#66CC66;background-color:red"">el</span><span style="color:#FF9966;background-color:yellow">lo</span></html>'html='<html><span style="color:#FF0000;background-color:green">H</span><span style="color:#66CC66;background-color:red"">el</span><span style="color:#FF9966;background-color:yellow">lo</span></html>'
wandb.init(project=PROJECT, name="test_html", job_type="test")wandb.init(project=PROJECT, name="test_html", job_type="test")
wandb.log({"html" : wandb.Html(html)})wandb.log({"html" : wandb.Html(html)})
wandb.run.finish()wandb.run.finish()
-#art = wandb.Api().artifact('stacey/wave/gpt2-med-tune-3-compose-diff-templates:v0')+art = wandb.Api().artifact('stacey/wave/gpt2-med-tune-3-compose-diff-templates:v0')
-art = wandb.Api().artifact('stacey/wave/gpt2-med-tune-3-compose-long:v0')
Expand 77 lines ...
# %%# %%
wandb.init(project=PROJECT, name="upload-lyrics-test", job_type="upload")wandb.init(project=PROJECT, name="upload-lyrics-test", job_type="upload")
t = df.sample(frac=0.06)t = df.sample(frac=0.06)
tab = wandb.Table(dataframe=t)tab = wandb.Table(dataframe=t)
wandb.run.log({"song_lyrics_005_test" : tab})wandb.run.log({"song_lyrics_005_test" : tab})
wandb.run.finish()wandb.run.finish()
# %%# %%
wandb.init(project=PROJECT, name="upload-all-788", job_type="upload")wandb.init(project=PROJECT, name="upload-all-788", job_type="upload")
#df = pd.DataFrame([[float("inf")]])#df = pd.DataFrame([[float("inf")]])
Expand 30 lines ...
﻿
Add a comment