Pakistan's First Oracle Blog

Subscribe to Pakistan's First Oracle Blog feed
Blog By Fahd Mirza ChughtaiFahd Mirzahttp://www.blogger.com/profile/14722451950835849728noreply@blogger.comBlogger648125
Updated: 17 hours 12 min ago

Install Indic Parler-TTS model Locally

Tue, 2024-12-03 22:49

 This video shows how to locally install Indic Parler-TTS which can officially speak in 20 Indic languages.





Code:

conda create -n ai python=3.11 -y && conda activate ai

sudo apt-get install libportaudio2
conda install -c anaconda pyaudio

pip install torch torchaudio einops timm pillow
pip install git+https://github.com/huggingface/transformers
pip install git+https://github.com/huggingface/accelerate
pip install git+https://github.com/huggingface/diffusers
pip install huggingface_hub
pip install sentencepiece bitsandbytes protobuf decord
pip install librosa peft numpy

pip install git+https://github.com/huggingface/parler-tts.git


conda install -c conda-forge --override-channels notebook -y
conda install -c conda-forge --override-channels ipywidgets -y
jupyter notebook

import torch
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
import soundfile as sf

device = "cuda:0" if torch.cuda.is_available() else "cpu"

model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts").to(device)
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)

prompt = "अरे, तुम आज कैसे हो?"
description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."

input_ids = description_tokenizer(description, return_tensors="pt").input_ids.to(device)
prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
audio_arr = generation.cpu().numpy().squeeze()
sf.write("indic_tts_out.wav", audio_arr, model.config.sampling_rate)
Categories: DBA Blogs

Install HunyuanVideo Model Locally for Text to Video Generation

Tue, 2024-12-03 15:52

This video shows how to install HunyuanVideo AI model for text to video long generation locally.


Code:

git clone https://github.com/tencent/HunyuanVideo && cd HunyuanVideo

conda env create -f environment.yml

conda activate HunyuanVideo

conda install gcc_linux-64 gxx_linux-64 -y
conda install cuda -c nvidia -y

python -m pip install -r requirements.txt

pip install packaging
pip uninstall -y ninja && pip install ninja

python -m pip install git+https://github.com/Dao-AILab/flash-attention.git@v2.5.9.post1

huggingface-cli login  #get Read token from huggingface.co

huggingface-cli download tencent/HunyuanVideo --local-dir ./ckpts

cd HunyuanVideo/ckpts

huggingface-cli download xtuner/llava-llama-3-8b-v1_1-transformers --local-dir ./llava-llama-3-8b-v1_1-transformers

cd ..

python hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py --input_dir ckpts/llava-llama-3-8b-v1_1-transformers --output_dir ckpts/text_encoder

cd HunyuanVideo/ckpts
huggingface-cli download openai/clip-vit-large-patch14 --local-dir ./text_encoder_2

cd HunyuanVideo

python3 sample_video.py \
    --video-size 720 1280 \
    --video-length 129 \
    --infer-steps 30 \
    --prompt "a cat is running, realistic." \
    --flow-reverse \
    --seed 0 \
    --use-cpu-offload \
    --save-path ./results



Categories: DBA Blogs

Embedding Model in Oracle Database 23ai - Step-by-step Hands-on Tutorial

Mon, 2024-12-02 21:54


This is a step-by-step hands-on tutorial to use Oracle AI Vector Search on unstructured data combined with relational search on business data.



conda create -n ai python=3.11 -y && conda activate ai

sudo chmod 666 /var/run/docker.sock

docker pull container-registry.oracle.com/database/free:latest

docker run -d --name oracle-db \
  -p 1521:1521 \
  --dns 8.8.8.8 \
  -e ORACLE_PWD="YourStrongPassword" \
  container-registry.oracle.com/database/free:latest

docker ps -a

mkdir mymodel && cd mymodel

wget https://adwc4pm.objectstorage.us-ashburn-1.oci.customer-oci.com/p/VBRD9P8ZFWkKvnfhrWxkpPe8K03-JIoM5h_8EJyJcpE80c108fuUjg7R5L5O7mMZ/n/adwc4pm/b/OML-Resources/o/all_MiniLM_L12_v2_augmented.zip

wget 'https://huggingface.co/datasets/muhrafli/heart-diseases/resolve/main/heart%20(3).csv'
mv 'heart (3).csv' heart.csv

unzip -oq all_MiniLM_L12_v2_augmented.zip

docker cp all_MiniLM_L12_v2.onnx oracle-db:/home/oracle/
docker cp heart.csv oracle-db:/home/oracle/

docker exec -it oracle-db bash

mkdir mymodel
mv all_MiniLM_L12_v2.onnx mymodel
mv heart.csv mymodel
cd mymodel

sqlplus sys/YourStrongPassword@localhost:1521/FREEPDB1 as sysdba

create user if not exists myuser identified by myuser quota unlimited on users;
grant create session, db_developer_role, create mining model to myuser;

create or replace directory model_dir as '/home/oracle/mymodel';
grant read, write on directory model_dir to myuser;

begin
  dbms_vector.drop_onnx_model (
    model_name => 'ALL_MINILM_L12_V2',
    force => true);

  dbms_vector.load_onnx_model (
    directory  => 'model_dir',
    file_name  => 'all_MiniLM_L12_v2.onnx',
    model_name => 'ALL_MINILM_L12_V2');
end;
/

column model_name format a30
column algorithm format a10
column mining_function format a15

select model_name, algorithm, mining_function
from   user_mining_models
where  model_name = 'ALL_MINILM_L12_V2';

set long 1000000
select vector_embedding(all_minilm_l12_v2 using 'hello' as data) AS my_vector;


-- Create table
drop table if exists heart_disease_data purge;

create table heart_disease_data as
select age, sex, chest_pain_type, resting_bp, cholesterol, fasting_bs, resting_ecg, max_hr, exercise_angina, oldpeak, st_slope, heart_disease
from   external (
         (
           age            number(3),
           sex            varchar2(10),
           chest_pain_type varchar2(10),
           resting_bp     number(5),
           cholesterol    number(10),
           fasting_bs     number(1),
           resting_ecg    varchar2(10),
           max_hr         number(5),
           exercise_angina varchar2(10),
           oldpeak        number(10,1),
           st_slope       varchar2(10),
           heart_disease  number(1)
         )
         type oracle_loader
         default directory model_dir
         access parameters (
           records delimited by newline
           skip 1
           badfile model_dir
           logfile model_dir:'heart_disease_data_ext_tab_%a_%p.log'
           discardfile model_dir
           fields csv with embedded terminated by ',' optionally enclosed by '"'
           missing field values are null
           (
             age,
             sex,
             chest_pain_type,
             resting_bp,
             cholesterol,
             fasting_bs,
             resting_ecg,
             max_hr,
             exercise_angina,
             oldpeak,
             st_slope,
             heart_disease
           )
        )
        location ('heart.csv')
        reject limit unlimited
      );

-- Describe table
desc heart_disease_data;

-- Add vector column
alter table heart_disease_data add (
  patient_vector vector
);

-- Describe table
desc heart_disease_data;

-- Populate vector column
update heart_disease_data
set    patient_vector = vector_embedding(all_minilm_l12_v2 using concat(age, sex, chest_pain_type, resting_bp, cholesterol, fasting_bs, resting_ecg, max_hr, exercise_angina, oldpeak, st_slope) as data);

commit;


-- Vector Search using VECTOR_DISTANCE
-- Search for patients with similar characteristics to "patient with high cholesterol and high blood pressure"

variable search_text varchar2(100);
exec :search_text := 'patient with high cholesterol and high blood pressure';

set linesize 200
column age format 999
column sex format a10
column chest_pain_type format a10
column resting_bp format 99999
column cholesterol format 9999999
column fasting_bs format 9
column resting_ecg format a10
column max_hr format 99999
column exercise_angina format a10
column oldpeak format 99999.9
column st_slope format a10
column heart_disease format 9

SELECT vector_distance(patient_vector, (vector_embedding(all_minilm_l12_v2 using :search_text as data))) as distance,
       age,
       sex,
       chest_pain_type,
       resting_bp,
       cholesterol,
       fasting_bs,
       resting_ecg,
       max_hr,
       exercise_angina,
       oldpeak,
       st_slope,
       heart_disease
FROM   heart_disease_data
order by 1
fetch approximate first 5 rows only;



-- Vector Search using VECTOR_DISTANCE
-- Search for patients with similar characteristics to "patient with chest pain and high heart rate"

variable search_text varchar2(100);
exec :search_text := 'patient with chest pain and high heart rate';

set linesize 200
column age format 999
column sex format a10
column chest_pain_type format a10
column resting_bp format 99999
column cholesterol format 9999999
column fasting_bs format 9
column resting_ecg format a10
column max_hr format 99999
column exercise_angina format a10
column oldpeak format 99999.9
column st_slope format a10
column heart_disease format 9

SELECT vector_distance(patient_vector, (vector_embedding(all_minilm_l12_v2 using :search_text as data))) as distance,
       age,
       sex,
       chest_pain_type,
       resting_bp,
       cholesterol,
       fasting_bs,
       resting_ecg,
       max_hr,
       exercise_angina,
       oldpeak,
       st_slope,
       heart_disease
FROM   heart_disease_data
order by 1
fetch approximate first 5 rows only;


========================
Cleanup:

docker stop oracle-db && docker rm oracle-db

docker images

docker rmi <image_id>
Categories: DBA Blogs

Create a Free Local AI Dungeon Game with Ollama

Sat, 2024-11-30 22:20

  This video shows how to create a dungeon and dragon game with help of local models with Ollama easily.


Code:


conda create -n ai python=3.10 -y && conda activate ai

mkdir mygame && cd mygame

pip install pydantic==2.8.2 gradio==4.44.1 ollama

system_prompt = f"""
Your job is to help create interesting futuristic worlds that \
players would love to explore.
Instructions:
- Only generate in plain text without formatting.
- Use simple clear language without being overly technical.
- You must stay below 3-5 sentences for each description.
"""

world_prompt = f"""
Generate a creative description for a unique futuristic world with an
interesting concept around humans colonizing new planets in a distant galaxy.

Output content in the form:
World Name: <WORLD NAME>
World Description: <WORLD DESCRIPTION>

World Name:"""

import os
import json
from ollama import chat
from ollama import ChatResponse

response: ChatResponse = chat(model='llama3.2',
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": world_prompt}
    ]
)

world_output = response['message']['content']
print(world_output)

world_output = world_output.strip()
world = {
    "name": world_output.split('\n')[0].strip()
    .replace('World Name: ', ''),
    "description": '\n'.join(world_output.split('\n')[1:])
    .replace('World Description:', '').strip()
}

kingdom_prompt = f"""
Create 3 different colonies for a futuristic world.
For each colony describe the leaders, societal structures, and notable achievements.

Output content in the form:
Colony 1 Name: <COLONY NAME>
Colony 1 Description: <COLONY DESCRIPTION>
Colony 2 Name: <COLONY NAME>
Colony 2 Description: <COLONY DESCRIPTION>
Colony 3 Name: <COLONY NAME>
Colony 3 Description: <COLONY DESCRIPTION>

World Name: {world['name']}
World Description: {world['description']}

Colony 1"""

response: ChatResponse = chat(model='llama3.2',
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": kingdom_prompt}
    ]
)

kingdoms = {}
kingdoms_output = response['message']['content']

for output in kingdoms_output.split('\n\n'):
  kingdom_name = output.strip().split('\n')[0] \
    .split('Name: ')[1].strip()
  print(f'Created colony "{kingdom_name}" in {world["name"]}')
  kingdom_description = output.strip().split('\n')[1] \
    .split('Description: ')[1].strip()
  kingdom = {
      "name": kingdom_name,
      "description": kingdom_description,
      "world": world['name']
  }
  kingdoms[kingdom_name] = kingdom
world['kingdoms'] = kingdoms

print(f'\nColony 1 Description: \
{kingdom["description"]}')


def get_town_prompt(world, kingdom):
    return f"""
    Create 3 different starports for a futuristic colony and world. \
    Describe the region they're in, important facilities, \
    and notable history.
   
    Output content in the form:
    Starport 1 Name: <STARPORT NAME>
    Starport 1 Description: <STARPORT DESCRIPTION>
    Starport 2 Name: <STARPORT NAME>
    Starport 2 Description: <STARPORT DESCRIPTION>
    Starport 3 Name: <STARPORT NAME>
    Starport 3 Description: <STARPORT DESCRIPTION>
   
    World Name: {world['name']}
    World Description: {world['description']}
   
    Colony Name: {kingdom['name']}
    Colony Description {kingdom['description']}
   
    Starport 1 Name:"""


def create_towns(world, kingdom):
    print(f'\nCreating starports for colony: {kingdom["name"]}...')
    response: ChatResponse = chat(model='llama3.2',
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_town_prompt(world, kingdom)}
        ]
    )  
    towns_output = response['message']['content']
   
    towns = {}
    for output in towns_output.split('\n\n'):
        town_name = output.strip().split('\n')[0]\
        .split('Name: ')[1].strip()
        print(f'- {town_name} created')
       
        town_description = output.strip().split('\n')[1]\
        .split('Description: ')[1].strip()
       
        town = {
          "name": town_name,
          "description": town_description,
          "world": world['name'],
          "kingdom": kingdom['name']
        }
        towns[town_name] = town
    kingdom["towns"] = towns
   
for kingdom in kingdoms.values():
    create_towns(world, kingdom)  

town = list(kingdom['towns'].values())[0]
print(f'\nStarport 1 Description: \
{town["description"]}')

def get_npc_prompt(world, kingdom, town):
    return f"""
    Create 3 different characters based on the world, colony, \
    and starport they're in. Describe the character's appearance and \
    role, as well as their motivations and challenges.
   
    Output content in the form:
    Character 1 Name: <CHARACTER NAME>
    Character 1 Description: <CHARACTER DESCRIPTION>
    Character 2 Name: <CHARACTER NAME>
    Character 2 Description: <CHARACTER DESCRIPTION>
    Character 3 Name: <CHARACTER NAME>
    Character 3 Description: <CHARACTER DESCRIPTION>
   
    World Name: {world['name']}
    World Description: {world['description']}
   
    Colony Name: {kingdom['name']}
    Colony Description: {kingdom['description']}
   
    Starport Name: {town['name']}
    Starport Description: {town['description']}
   
    Character 1 Name:"""

def create_npcs(world, kingdom, town):
    print(f'\nCreating characters for the starport of: {town["name"]}...')
    response: ChatResponse = chat(model='llama3.2',
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_npc_prompt(world, kingdom, town)}
        ]
    )
       
    npcs_output = response['message']['content']
    npcs = {}
    for output in npcs_output.split('\n\n'):
        lines = output.strip().split('\n')
        if len(lines) < 2:
            print(f"Warning: skipping invalid NPC output - {output}")
            continue
        npc_name_line = lines[0]
        if "Name: " not in npc_name_line:
            print(f"Warning: skipping invalid NPC output - {output}")
            continue
        npc_name = npc_name_line.split('Name: ')[1].strip()
        npc_description = ""
        for line in lines[1:]:
            if "Description: " in line:
                npc_description = line.split('Description: ')[1].strip()
            elif "Motivations and Challenges: " in line:
                npc_description += "\n" + line.split('Motivations and Challenges: ')[1].strip()
        print(f'- "{npc_name}" created')
       
        npc = {
        "name": npc_name,
        "description": npc_description,
        "world": world['name'],
        "kingdom": kingdom['name'],
        "town": town['name']
        }
        npcs[npc_name] = npc
    town["npcs"] = npcs


for kingdom in kingdoms.values():
    for town in kingdom['towns'].values():
        create_npcs(world, kingdom, town)
  # For now we'll only generate npcs for one kingdom
    break

npc = list(town['npcs'].values())[0]

print(f'\nNPC 1 in {town["name"]}, \
{kingdom["name"]}:\n{npc["description"]}')
   

def save_world(world, filename):
    with open(filename, 'w') as f:
        json.dump(world, f)

def load_world(filename):
    with open(filename, 'r') as f:
        return json.load(f)

save_world(world, 'MyWorld.json')

import gradio as gr
import os
demo = None #added to allow restart

def start_game(main_loop, share=False):
    # added code to support restart
    global demo
    # If demo is already running, close it first
    if demo is not None:
        demo.close()

    demo = gr.ChatInterface(
        main_loop,
        chatbot=gr.Chatbot(height=250, placeholder="Type 'start game' to begin"),
        textbox=gr.Textbox(placeholder="What do you do next?", container=False, scale=7),
        title="AI RPG",
        # description="Ask Yes Man any question",
        theme="soft",
        examples=["Look around", "Continue the story"],
        cache_examples=False,
        retry_btn="Retry",
        undo_btn="Undo",
        clear_btn="Clear",
                           )
    demo.launch(share=share, server_name="0.0.0.0")

def test_main_loop(message, history):
    return 'Entered Action: ' + message

start_game(test_main_loop)


world = load_world('MyWorld.json')
kingdom = world['kingdoms']['Aurora Isles']
town = kingdom['towns']["Helios Landing"]
character = town['npcs']['Dr. Lyra Flynn']

system_prompt = """You are an AI Game master. Your job is to create a
start to an adventure based on the world, colony, starport, and character
a player is playing as.
Instructions:
You must only use 2-4 sentences \
Write in second person. For example: "You are Alex" \
Write in present tense. For example "You are standing..." \
First describe the character and their background. \
Then describe where they start and what they see around them."""
world_info = f"""
World: {world}
Kingdom: {kingdom}
Town: {town}
Your Character: {character}
"""

response: ChatResponse = chat(model='llama3.2',
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": world_info + '\nYour Start:'}
    ]
)

start = response['message']['content']
print(start)
world['start'] = start
save_world(world, 'MyWorld.json')


def run_action(message, history, game_state):
   
    if(message == 'start game'):
        return game_state['start']

    system_prompt = """You are an AI Game master. Your job is to write what \
happens next in a player's adventure game.\
Instructions: \
You must only write 1-3 sentences in response. \
Always write in second person present tense. \
Ex. (You approach the control panel...)"""
   
    world_info = f"""
World: {game_state['world']}
Kingdom: {game_state['kingdom']}
Town: {game_state['town']}
Your Character:  {game_state['character']}"""

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": world_info}
    ]
    for action in history:
        messages.append({"role": "assistant", "content": action[0]})
        messages.append({"role": "user", "content": action[1]})

    messages.append({"role": "user", "content": message})

    response: ChatResponse = chat(model='llama3.2',
        messages=messages
    )
   
    result = response['message']['content']
    return result
   
       
game_state = {
    "world": world['description'],
    "kingdom": kingdom['description'],
    "town": town['description'],
    "character": character['description'],
    "start": start,
}

def main_loop(message, history):
    return run_action(message, history, game_state)
Categories: DBA Blogs

Free ComfyUI WorkFlows for Various AI Models

Sat, 2024-11-23 00:59

ComfyUI is a user-friendly, node-based interface for Stable Diffusion. It allows you to create custom image generation workflows by connecting different functional blocks, known as "nodes".

 Following is the link to collection of free comfyUI workflows I use on my YT Channel videos:

fahdmirza/comfyuiworkflows


I hope that helps. 

Categories: DBA Blogs

Favorite Feature in Oracle 23ai

Sun, 2024-11-17 21:35

 In today's data-driven world, businesses rely on robust databases to manage their mission-critical workloads. Oracle Database 23ai Free offers a streamlined experience of this industry-leading database, with resource limits of up to 2 CPUs for foreground processes, 2 GB of RAM, and 12 GB of user data on disk. This free version is designed for ease of use and simple download, making it an ideal starting point for exploring the capabilities of Oracle Database.


A key feature that sets Oracle Database 23ai apart is its AI Vector Search capability. But what exactly are vectors? In simple terms, vectors are mathematical representations of data that capture complex relationships and patterns. They are a way to encode data, such as text, images, or audio, into numerical values that can be easily processed and analyzed by machines. Vectors enable computers to understand the semantic meaning and context of data, allowing for more accurate and efficient searching and analysis.


Vector search takes this concept a step further. It is a technique used to quickly identify similar data points within a vast dataset. Traditional search methods rely on keyword matching or exact phrase searches, but vector search enables more nuanced and intuitive queries. By comparing the vector representations of different data points, vector search can identify patterns and relationships that would be missed by traditional search methods.

Oracle AI Vector Search builds on this technology, introducing a converged database capability that revolutionizes the way businesses interact with their data. 


By storing vectors as a native data type and utilizing vector indexes and SQL functions, AI Vector Search enables fast and simple similarity search queries on both structured and unstructured data. This means that customers can quickly identify similar information across documents, images, and other unstructured data sources. Furthermore, AI Vector Search allows prompts to large language models (LLMs) to be augmented with private business data or domain knowledge, unlocking new possibilities for data-driven insights and decision-making.


With Oracle AI Vector Search, businesses can unlock the full potential of their data, uncovering hidden patterns and relationships that drive innovation and growth. Whether you're working with text, images, or other data types, Oracle Database 23ai's AI Vector Search capability is poised to transform the way you search, analyze, and interact with your data.

Insallation:

Intall it with Docker:

docker pull container-registry.oracle.com/database/free:latest

Install it with Oracle VirtualBox:

Oracle_Database_23ai_Free_Developer.ova

Install it with Linux / Windows:

oracle-database-free-23ai-1.0-1.el8.x86_64.rpm
WINDOWS.X64_236000_free.zip

Connecting to Oracle Database Free:

For PDB: sqlplus sys@localhost:1521/FREEPDB1 as sysdba
For CDB: sqlplus sys@localhost:1521/FREE as sysdba

In Python:

import oracledb

conn = oracledb.connect(user="[Username]", password="[Password]", dsn="localhost:1521/FREEPDB1")
with conn.cursor() as cur:
   cur.execute("SELECT 'Hello World!' FROM dual")
   res = cur.fetchall()
   print(res)
   
In Go:

package main
     
import (
      "fmt"
      "log"
      "database/sql"
      _ "github.com/godror/godror"
)
     
func main() {  
     
      // connectString format: [hostname]:[port]/[DB service name]
     
      dsn := `user="[Username]"
              password="[Password]"
              connectString="localhost:1521/FREEPDB1"`  
     
      db, err := sql.Open("godror", dsn)
      if err != nil {
        panic(err)
      }
      defer db.Close()
     
      rows, err := db.Query("SELECT 'Hello World!' FROM dual")
      if err != nil {
        panic(err)
      }
      defer rows.Close()
     
      var strVal string
      for rows.Next() {
        err := rows.Scan(&strVal)
        if err != nil {
          log.Fatal(err)
        }
        fmt.Println(strVal)
      }
      err = rows.Err()
      if err != nil {
        log.Fatal(err)
      }
     
}  
Categories: DBA Blogs

Oracle Database 23ai and GraphQL

Sun, 2024-11-17 17:12

 In today's data-driven world, AI needs fuel to power innovative applications. Oracle Database 23ai brings AI directly to your data, making it effortless to develop cutting-edge apps and tackle mission-critical tasks. But what makes this possible? Enter GraphQL, a game-changing query language that's changing the way we interact with data.

GraphQL is an open-source data query and manipulation language developed by Facebook in 2015. It allows clients to specify exactly what data they need, eliminating unnecessary requests and improving performance. GraphQL's declarative nature makes it a perfect fit for modern, data-driven applications. Its history is impressive, with Facebook open-sourcing it in 2015, followed by widespread adoption by tech giants like GitHub, Pinterest, and Airbnb.

GraphQL solves several pain points that have plagued developers for years. By allowing clients to receive only requested data, GraphQL reduces data transfer and minimizes bandwidth usage. This results in improved performance, as fewer requests and optimized data retrieval lead to faster responses. Additionally, GraphQL supports multiple data sources, making integration seamless. Its self-documenting nature and intuitive queries simplify development, making it a favorite among developers.

Traditionally, relational databases like Oracle used SQL for querying. However, SQL can be restrictive, requiring multiple queries to fetch related data. GraphQL changes this by enabling simplified complex queries and real-time data retrieval. This makes it perfect for applications requiring instant updates. Oracle's integration of GraphQL into its database takes this power to the next level, offering native support, optimized queries, and robust security features.

With Oracle Database 23ai and GraphQL, developers can build innovative applications faster and more efficiently. GraphQL's nested queries and relationships make fetching complex data easier, while Oracle's database engine optimizes queries for peak performance. This powerful combination enables developers to focus on building exceptional user experiences.

Imagine querying a movie database to get personalized recommendations. With GraphQL, you can fetch exactly what you need. For example:

Here are some examples:

Query 1: Get Movie Details

query Movies {
    movies {
        title
        director
        genres
        cast {
            actor_name
            character_name
        }
    }
}

Query 2: Find Movies by Actor

query MoviesByActor {
    movies {
        title
        release_year
        actors {
            actor_name
            movies_actor_id {
            title
        }
    }
}
}

Query 3: Discover Movie Recommendations

query Recommendations {
    movies {
    title
    rating
    similar_movies {
        title
        genre
        }
    }
    }


These examples illustrate the potential of Oracle Database 23ai and GraphQL. By combining AI-powered data analysis with intuitive querying, developers can unlock new possibilities in application development.


With Oracle Database 23ai and GraphQL, building innovative movie apps is faster, easier, and more powerful than ever.


Hope this helps.

Categories: DBA Blogs

Session Monitoring and Session Cleanup in Oracle

Sat, 2024-11-16 23:53

 As an Oracle database administrator, managing sessions is crucial for maintaining performance and availability. This script provides a comprehensive solution for monitoring and cleaning up idle and blocking sessions.


The script identifies blocking sessions exceeding a specified threshold (default: 60 minutes), kills them, and sends notification emails. It also identifies idle sessions exceeding a specified threshold (default: 60 minutes), kills them, and sends notification emails. Key components include session identification using V$SESSION , V$PROCESS , and V$TRANSACTION , threshold settings, notification email functionality using TRACK.SEND_EMAIL , and error handling.

To implement this script, you'll need to declare variables for threshold settings (minutes), notification lists, and other necessary variables. The script then monitors blocking sessions using a FOR loop, killing each blocking session and sending notifications. A similar loop monitors idle sessions.

DECLARE
  -- Threshold settings (minutes)
  in_blocker_threshold_minutes NUMBER := 60;
  in_idle_threshold_minutes NUMBER := 60;
 
  -- Notification list
  in_notification_list VARCHAR2(100) := 'your_email@example.com';
 
  -- Other variables
  v_Body CLOB;
  any_blockers_killed NUMBER := 0;
  any_idlers_killed NUMBER := 0;
 
BEGIN
  -- Monitor blocking sessions
  FOR bses IN (
    SELECT s.sid, s.serial#, p.spid, s.username, s.program, machine, osuser,
           logon_time, last_call_et,
           NVL(sbc.ses_blocking_cnt, 0) ses_blocking_cnt,
           NVL2(t.used_ublk, TO_CHAR(used_ublk), 'none') used_ublk, sa.sql_text last_command
    FROM v$session s, v$process p, v$transaction t, v$sqlarea sa,
         (SELECT blocking_session, COUNT(*) ses_blocking_cnt FROM v$session
          WHERE blocking_session IS NOT NULL GROUP BY blocking_session) sbc
    WHERE last_call_et > in_blocker_threshold_minutes * 60
      AND s.username IS NOT NULL
      AND s.type <> 'BACKGROUND'
      AND s.blocking_session IS NULL
  ) LOOP
    -- Kill blocking session and send notification
    BEGIN
      EXECUTE IMMEDIATE 'ALTER SYSTEM KILL SESSION ''' || bses.sid || ',' || bses.serial# || ''' IMMEDIATE';
      any_blockers_killed := 1;
    EXCEPTION
      WHEN MARKED_FOR_KILL THEN
        DBMS_OUTPUT.PUT_LINE(bses.sid || ',' || bses.serial# || ' marked for kill.');
    END;
  END LOOP;
 
  -- Monitor idle sessions
  FOR ises IN (
    SELECT s.sid, s.serial#, p.spid, s.username, s.program, machine, osuser,
           logon_time, last_call_et,
           NVL(sbc.ses_blocking_cnt, 0) ses_blocking_cnt,
           NVL2(t.used_ublk, TO_CHAR(used_ublk), 'none') used_ublk, sa.sql_text last_command
    FROM v$session s, v$process p, v$transaction t, v$sqlarea sa,
         (SELECT blocking_session, COUNT(*) ses_blocking_cnt FROM v$session
          WHERE blocking_session IS NOT NULL GROUP BY blocking_session) sbc
    WHERE last_call_et > in_idle_threshold_minutes * 60
      AND s.username IS NOT NULL
      AND s.type <> 'BACKGROUND'
  ) LOOP
    -- Kill idle session and send notification
    BEGIN
      EXECUTE IMMEDIATE 'ALTER SYSTEM KILL SESSION ''' || ises.sid || ',' || ises.serial# || ''' IMMEDIATE';
      any_idlers_killed := 1;
    EXCEPTION
      WHEN MARKED_FOR_KILL THEN
        DBMS_OUTPUT.PUT_LINE(ises.sid || ',' || ises.serial# || ' marked for kill.');
    END;
  END LOOP;
 
  -- Send notification emails
  IF any_blockers_killed = 1 OR any_idlers_killed = 1 THEN
    TRACK.SEND_EMAIL('oracle@your_host', in_notification_list, 'Killed sessions on your_instance', '<pre>' || v_Body || '</pre>');
  END IF;
 
EXCEPTION
  WHEN OTHERS THEN
    DBMS_OUTPUT.PUT_LINE('Error checking idle and blocking sessions in your_instance');
    TRACK.SEND_EMAIL('oracle@your_host', in_notification_list, 'Error checking idle and blocking sessions in your_instance', '<pre>' || SQLERRM || '</pre>');
    RAISE;
END;
/


To maximize the effectiveness of this script, consider the following best practices:

  • Schedule the script to run regularly (e.g., every 30 minutes).
  • Adjust threshold settings according to your database requirements.
  • Monitor notification emails for killed sessions.
Hope this helps.

Categories: DBA Blogs

Tencent Hunyuan3D-1 - Install Locally - 3D Generation AI Model from Text

Mon, 2024-11-11 15:21

 This video shows how to locally install Tencent Hunyuan3D-1 model for 3D generation.



Code:

conda create -n ai python=3.9 -y && conda activate ai

conda remove cuda-compiler
conda install gcc_linux-64=11.2.0 gxx_linux-64=11.2.0 -y

conda install cuda=11.8 -c nvidia -y

conda install pytorch=2.0.1 torchvision==0.15.2 pytorch-cuda=11.8 -c pytorch -c nvidia -y

conda install -c iopath iopath -y
conda install -c bottler nvidiacub -y
conda install pytorch3d -c pytorch3d -y
conda install anaconda::cmake -y
conda install conda-forge::lit-nlp -y
conda install anaconda::numpy=1.23.5 -y

git clone https://github.com/tencent/Hunyuan3D-1 && cd Hunyuan3D-1


#From below remove, pytorch3 from env_install.sh file.
chmod a+x env_install.sh
./env_install.sh

pip install huggingface_hub
huggingface-cli login  


mkdir weights
huggingface-cli download tencent/Hunyuan3D-1 --local-dir ./weights

mkdir weights/hunyuanDiT
huggingface-cli download Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers-Distilled --local-dir ./weights/hunyuanDiT

python3 main.py --text_prompt "a lovely rabbit" --save_folder ./outputs/test/ --max_faces_num 90000 --do_texture_mapping --do_render

python3 main.py --image_prompt "/home/Ubuntu/images/komodo.png" --save_folder ./outputs/test/ --max_faces_num 90000 --do_texture_mapping --do_render
Categories: DBA Blogs

How-To Resolve Enqueue Errors in Any Version of Oracle Database

Mon, 2024-11-04 23:45

 As an Oracle database administrator, you've likely encountered errors that make your heart skip a beat. One such error is ORA-00240: control file enqueue held for more than 120 seconds. But before you panic, let's break down what this error means and how to address it.

This error occurs when the control file enqueue is held for an extended period (over 120 seconds). The control file is a critical component of the Oracle database, managing database structure and integrity.

If you see this error occasionally, and your instance remains up and running, it's likely a fleeting glitch. Ignore it and move on.

However, if:

  • The error occurs frequently
  • Your instance hangs or crashes
  • Performance is severely impacted

You need to be worried about it.

In my experience, ORA-00240 can be triggered by:

  • High session counts conflicting with OS ulimits
  • Shared pool latch contention (as noted in some MOS documents)
  • Bugs in the Oracle software (resolvable with PSUs or one-off patches)

You should be checking:

  • Check alert logs for frequency and patterns.
  • Verify OS ulimits are adequately set.
  • Monitor shared pool latch contention using

SELECT
  NAME,
  GETS,
  WAITS,
  IMP_GETS,
  IMP_WAITS
FROM
  V$LATCH
WHERE
  NAME = 'shared pool';

Don't panic over occasional ORA-00240 errors. However, frequent occurrences warrant immediate attention. By understanding the causes and taking proactive steps, you'll minimize downtime and ensure your Oracle database runs smoothly.

Categories: DBA Blogs

Troubleshooting ORA-1652 by Identifying Temporary Segment Usage

Fri, 2024-11-01 00:39

I still get bit anxious when I receive this ORA-1652 error in production databases but its a hard nut to crack. Encountering ORA-1652 errors can be frustrating, especially when dealing with temporary segment usage. To quickly identify the root cause, use the following query to analyze temporary segment allocation:

SELECT

  sql_id,

  SUM(temp_space_allocated)/1024/1024 AS temp_space_MB

FROM

  dba_hist_active_sess_history

WHERE

  sample_time BETWEEN TIMESTAMP '2020-06-25 19:30:00' AND TIMESTAMP '2020-06-25 20:00:00'

GROUP BY

  sql_id

ORDER BY

  2 DESC;


  

To further investigate:

  • Real-Time Session Monitoring: Use Oracle Enterprise Manager (EM) or query V$SESSION to identify active sessions consuming temporary space.
  • Temporary Segment Usage: Query V$TEMPSEG_USAGE to analyze temporary segment allocation.
  • v$tempseg_usage: Examine this view to identify temporary segment usage patterns.

Key Views to Analyze

  • dba_hist_active_sess_history: Historical session data
  • V$SESSION: Real-time session information
  • V$TEMPSEG_USAGE: Temporary segment usage details
  • v$tempseg_usage: Temporary segment usage patterns

Common Causes of ORA-1652

  • Insufficient temporary tablespace
  • Large sorting or joining operations
  • Inefficient SQL queries
  • Incorrect indexing

Best Practices

  • Regularly monitor temporary segment usage
  • Optimize SQL queries to reduce temporary space allocation
  • Ensure sufficient temporary tablespace allocation
  • Consider partitioning large tables

By using these queries and views, you'll quickly identify the causes of ORA-1652 errors and take corrective action to optimize your database performance.  

Categories: DBA Blogs

Estimating Query Execution Time in Oracle

Sun, 2024-10-27 00:33

 As an Oracle database administrator or developer, running heavy, long-running, and critical production queries can be nerve-wracking, especially in cloud environments. One crucial aspect is estimating the query execution time to plan and manage resources effectively. In this post, we'll explore a valuable query that provides an approximate ETA (Estimated Time of Arrival) for parallel queries on large datasets.

Knowing the ETA helps:

  • Plan resource allocation and utilization
  • Manage expectations and prioritize tasks
  • Identify potential performance bottlenecks
  • Optimize queries for better performance


ETA Query:

-- Set column widths for better readability
col sid for 999999
col QC_SID for 999999
col QC_INST for 9
col username for a10
col operation_name for a20
col target for a20
col units for a10
col start_time for a18

-- Main query
SELECT
  px.sid,
  CASE
    WHEN px.qcinst_id IS NULL THEN username
    ELSE username || ' - ' || LOWER(SUBSTR(pp.SERVER_NAME, LENGTH(pp.SERVER_NAME) - 4, 4))
  END AS "Username",
 
  SUBSTR(opname, 1, 30) AS operation_name,
  SUBSTR(target, 1, 30) AS target,
  sofar,
  totalwork,
  ROUND(sofar / totalwork * 100) AS pct_done,
  units,
  start_time,
  ROUND(totalwork / (sofar / ((SYSDATE - start_time) * 1440))) AS eta_min,
  CASE
    WHEN px.qcinst_id IS NULL THEN s.sid
    ELSE px.qcsid
  END AS QC_SID,
  px.qcinst_id AS QC_INST
FROM
  gv$px_session px,
  gv$px_process pp,
  gv$session_longops s
WHERE
  px.sid = s.sid
  AND px.serial# = s.serial#
  AND px.inst_id = s.inst_id
  AND px.sid = pp.sid (+)
  AND px.serial# = pp.serial# (+)
  AND sofar <> totalwork
ORDER BY
  CASE
    WHEN px.QCINST_ID IS NULL THEN px.INST_ID
    ELSE px.QCINST_ID
  END,
  px.QCSID,
  CASE
    WHEN px.SERVER_GROUP IS NULL THEN 0
    ELSE px.SERVER_GROUP
  END,
  px.SERVER_SET,
  px.INST_ID;

This query joins three dynamic performance views:


  • gv$px_session (parallel execution sessions)
  • gv$px_process (parallel execution processes)
  • gv$session_longops (long-running operations)


It calculates:

  • eta_min: estimated time to completion in minutes
  • pct_done: percentage of work completed


To customize the query:

  • Filter by specific username or operation name
  • Add additional columns for more detailed information
  • Use gv$session instead of gv$px_session for non-parallel queries


By using this query, you'll gain valuable insights into your critical production queries and make informed decisions about resource allocation and optimization.

Categories: DBA Blogs

Oracle Database Connections with Oracle Functions

Sun, 2024-10-20 00:29

 As a developer, connecting to databases can be a hassle. But what if you could leverage a serverless platform to streamline your connections? Enter Oracle Functions, a fully managed, scalable, and on-demand functions-as-a-service platform built on Oracle Cloud Infrastructure (OCI). In this post, we'll explore how to connect to an Oracle database using Oracle Functions and Python.

With Oracle Functions, you can focus on writing code, not managing infrastructure. Its serverless architecture eliminates administrative tasks, allowing you to:


  • Scale effortlessly
  • Pay only for executed functions
  • Enjoy enterprise-grade security


To connect to your Oracle database, you'll need:


  • cx_Oracle library (install using pip install cx_Oracle)
  • Oracle database credentials (username, password, host, and service name)


Here's an example Python function to get you started:

import cx_Oracle



def sql_connect(username, password, host, service_name, query):

    dsn = cx_Oracle.makedsn(host, 1521, service_name)

    conn = cx_Oracle.connect(user=username, password=password, dsn=dsn)

    cursor = conn.cursor()

    cursor.execute(query)

    result = cursor.fetchall()

    conn.close()

    return result



if __name__ == '__main__':

    host = "remoteserver"

    service_name = "test"

    username = "your_username"

    password = "your_password"

    query = "SELECT SYSDATE FROM DUAL"

    print(sql_connect(username, password, host, service_name, query))




When using Oracle Functions to connect to your database:


  • Ensure your Oracle database is accessible from your Oracle Functions environment.
  • Store sensitive credentials securely using Oracle Cloud Infrastructure's Vault service.
  • Optimize your queries for performance.


By using Oracle Functions and Python, you can simplify your Oracle database connections and focus on building scalable applications.

Categories: DBA Blogs

Train F5-TTS Voice Model on Custom Dataset for Free Locally - Step by Step Tutorial

Tue, 2024-10-15 20:30

 This video is a step-by-step tutorial to fine-tune or do full training of a voice model F5-TTS and E2-TTS on your own custom voice dataset locally. 





Code:
git clone https://github.com/SWivid/F5-TTS.git && cd F5-TTS

cd ckpts

mkdir F5TTS_Base

wget https://huggingface.co/SWivid/F5-TTS/resolve/main/F5TTS_Base/model_1200000.safetensors?download=true

-- In train.py, in Line 75, make sure that path points to your model's directory
-- In models/trainer.py , in Line 94, make sure that path points to your model's directory


conda create -n ai python=3.11 -y && conda activate ai

pip install torch torchaudio
pip install git+https://github.com/huggingface/transformers
pip install git+https://github.com/huggingface/accelerate
pip install huggingface_hub
pip install pandas datasets

import pandas as pd
from datasets import load_dataset

dataset = load_dataset("amphion/Emilia-Dataset")

dataset.save_to_disk("/home/Ubuntu/mydataset/emilia_subset")

# prepare custom dataset up to your need
# download corresponding dataset first, and fill in the path in scripts , you may tailor your own one along with a Dataset class in model/dataset.py.

# Prepare the Emilia dataset
python scripts/prepare_emilia.py

# Prepare the Wenetspeech4TTS dataset
python scripts/prepare_wenetspeech4tts.py

Training
Once your datasets are prepared, you can start the training process.

# setup accelerate config, e.g. use multi-gpu ddp, fp16
# will be to: ~/.cache/huggingface/accelerate/default_config.yaml    
accelerate config
accelerate launch train.py
Categories: DBA Blogs

F5-TTS Model Installation on Windows - Easy Step by Step Tutorial

Mon, 2024-10-14 19:57

 This video shows how to locally install F5-TTS and E2-TTS models on Windows easily with Pinokio, which are fairytaler that fakes fluent and faithful speech with flow matching.



Categories: DBA Blogs

Oracle Database Migration with Supplemental Logging

Thu, 2024-10-10 00:20

As an Oracle database administrator, I've come to appreciate the power of supplemental logging, especially when migrating databases or tables from one location to another. In this post, we'll explore what supplemental logging is, how it works, and provide practical examples to get you started.

According to Oracle documentation, supplemental logging records additional columns in redo log files, which is essential for redo-based applications. This process ensures that rows can be uniquely identified, making database migration and recovery more efficient.

When supplemental logging is enabled, redo logs contain extra columns from tables, including:

  • Primary key columns (if defined)
  • Unique index columns (if no primary key exists)
  • All columns (if no primary key or unique index exists)

The good news is that supplemental logging doesn't impact your Oracle instance's performance.

To enable supplemental logging, use the following commands:

Scenario 1: Table with Primary Key

ALTER TABLE my_table ADD SUPPLEMENTAL LOG DATA (PRIMARY KEY) COLUMNS;


Scenario 2: Table with Unique Index (no Primary Key)

Identify the first unique index in alphabetical order and create a supplemental log group on its columns.


Scenario 3: Table with No Primary Key or Unique Index

ALTER TABLE my_table ADD SUPPLEMENTAL LOG DATA (ALL) COLUMNS;


When migrating large tables, consider the following best practices:

  • Limit initial load to avoid overhead on the Oracle instance.
  • Migrate during non-business hours to minimize impact.
  • Break down large tables into smaller tasks.

By using supplemental logging, you'll streamline your Oracle database migration process and ensure a smoother transition.

Suppose you're migrating a table customers with a primary key customer_id. To enable supplemental logging, run:


ALTER TABLE customers ADD SUPPLEMENTAL LOG DATA (PRIMARY KEY) COLUMNS;


This simple command ensures that the customer_id column is included in the redo logs, making it easier to identify and recover rows during migration.


I hope this helps.

Categories: DBA Blogs

Oracle Cloud's free AI services

Mon, 2024-10-07 02:28

 Whenever I need to play with any AI model for free or for fraction of cost in cloud, my first preference is Oracle cloud's free tier for AI. You can also take advantage of it. 

Oracle Cloud is offering a range of AI services for free, allowing developers and data scientists to experiment and innovate without incurring costs. These services provide a comprehensive platform for building, training, and deploying machine learning models, as well as performing advanced text, speech, and image analysis.


At the heart of Oracle Cloud's AI offerings is its Machine Learning service, which provides up to 4,700 free hours. This service enables users to build, train, deploy, and manage machine learning models collaboratively, with scalability and power. Additionally, users can leverage digital assistant capabilities for up to 51 hours, allowing them to interact with natural language conversations and automate tasks without managing multiple apps and websites.


Beyond machine learning, Oracle Cloud's AI services include OCI Language for sophisticated text analysis at scale, OCI Speech for automatic speech recognition, OCI Vision for deep-learning-based image analysis, and OCI Document Understanding for extracting text, tables, and data from documents via APIs and CLI tools. These services are available with tiered pricing based on use cases, but users can experiment with them for free within the specified limits.


To support AI and machine learning workloads, Oracle Cloud also provides free compute and storage resources. This includes two Always Free AMD-based compute VMs with 1/8 OCPU and 1 GB memory each, up to 4,500 hours of compute for OCI Kubernetes Engine, and up to 500 GB of storage for Docker-based OCI Container Registry.


With Oracle Cloud's free AI services, developers and data scientists can develop and deploy machine learning models, explore advanced AI capabilities, and build innovative applications without incurring costs. Whether you're a seasoned AI expert or just starting to explore the possibilities of artificial intelligence, Oracle Cloud's free AI services provide an ideal platform to get started. Sign up for an Oracle Cloud account today and start playing with AI.

Categories: DBA Blogs

Whisper Large Turbo in Free Google Colab for Transcription - Step-by-Step Tutorial

Wed, 2024-10-02 16:04

 This video shows how to install and use Whisper large-v3-turbo in free google colab for transcription in gradio which is a finetuned version of a pruned Whisper large-v3. 



Code:

!pip install git+https://github.com/huggingface/transformers gradio

import torch
from transformers import pipeline

pipe = pipeline("automatic-speech-recognition",
               "openai/whisper-large-v3-turbo",
               torch_dtype=torch.float16,
               device="cuda:0")

pipe("/content/samples_jfk.wav")

import gradio as gr

def transcribe(inputs):
    if inputs is None:
        raise gr.Error("No audio file")

    text = pipe(inputs, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
    return text

demo = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(sources=["microphone", "upload"], type="filepath"),
    ],
    outputs="text",
    title="Whisper Large V3 Turbo: Transcribe Audio",
    description=(
        "Transcribe long-form microphone or audio inputs. Thanks to HuggingFace"
    ),
    allow_flagging="never",
)

demo.launch()
Categories: DBA Blogs

Graph RAG in Oracle RDBMS with SQL

Mon, 2024-09-30 22:24

 Oracle Database offers a comprehensive implementation of Resource Description Framework (RDF), seamlessly integrating semantic data management with robust enterprise capabilities. This converged database combines the benefits of RDF with Oracle's renowned features:

  • Transactional support for data integrity
  • High-performance querying and data processing
  • Advanced security measures for data protection
  • Reliability and scalability for mission-critical applications

What sets Oracle apart is its tight integration of RDF with SQL. This unique convergence enables:

  • RDF databases to be accessed and queried like traditional SQL databases
  • Seamless interoperability between semantic and relational data models

Retrieval-Augmented Generation (RAG) is a paradigm-shifting approach in natural language processing (NLP) and artificial intelligence (AI) that combines the strengths of retrieval and generation models. Traditional language models rely solely on generating text based on learned patterns, often lacking specific knowledge or context. RAG addresses this limitation by integrating external knowledge sources into the generation process. This hybrid approach involves two primary components: a retriever and a generator. The retriever fetches relevant information from vast knowledge bases or databases, while the generator uses this retrieved information to create contextually accurate and informative text.

RAG enables AI models to access and leverage vast amounts of external knowledge, making them more informative, accurate, and contextually relevant. This technology has far-reaching implications for various applications, including question answering, text summarization, chatbots, and language translation. By bridging the gap between knowledge retrieval and text generation, RAG significantly enhances the capabilities of AI systems, allowing them to provide more precise and informative responses.

Knowledge Graphs (KGs) are structured representations of knowledge that organize and store information in the form of entities, attributes, and relationships. Inspired by semantic networks and graph theory, KGs provide a robust framework for AI systems to reason, infer, and retrieve knowledge. A Knowledge Graph typically consists of nodes (entities) interconnected by edges (relationships), which can represent various types of associations, such as hierarchical, causal, or semantic relationships.

Knowledge Graphs serve as a foundation for various AI applications, including question answering, recommendation systems, and natural language processing.

Example:

Assume we have two tables: Author and Publication.

Using SQL/PGQ, define the explicit relationships as follows:

CREATE PROPERTY GRAPH research_network_pg

VERTEX TABLES (

  author

    KEY (id)

    LABEL author

    PROPERTIES ALL COLUMNS,

  publication

    KEY (id)

    LABEL publication

    PROPERTIES ALL COLUMNS

)

EDGE TABLES (

  author as writes_publication

    KEY (id)

    SOURCE KEY (id) REFERENCES author (id)

    DESTINATION KEY (written_by) REFERENCES publication (id)

    LABEL writes

    PROPERTIES ALL COLUMNS

);


Querying the Graph:


Retrieve authors and their publication titles:


SELECT 'Author' AS label, t.*

FROM   GRAPH_TABLE (research_network_pg

  MATCH

  (a IS author) -[c IS writes]-> (p IS publication)

  COLUMNS (a.name AS author_name, p.title AS publication_title)

) t

ORDER BY 1;


In this example:

  • Authors and publications are nodes (vertexes).
  • The "writes" relationship connects authors to their publications (edges).
  • The query retrieves authors and their corresponding publication titles. 
Hope this helps.

Categories: DBA Blogs

How-To Implement Agentic RAG with Llama 3.2 on Your Own Dataset Locally

Sat, 2024-09-28 01:44

 This video is a step-by-step tutorial on implementing Agentic RAG with Llama3.2 3B model on your own dataset locally.



Code:

conda create -n ag python=3.11 -y && conda activate ag

pip install torch
pip install git+https://github.com/huggingface/transformers
pip install git+https://github.com/huggingface/accelerate
pip install huggingface_hub
pip install sentencepiece
pip install bitsandbytes

pip install haystack-ai duckduckgo-api-haystack transformers sentence-transformers datasets

from datasets import load_dataset
from haystack import Document

from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.embedders import SentenceTransformersDocumentEmbedder

document_store = InMemoryDocumentStore()

dataset = load_dataset("bilgeyucel/seven-wonders", split="train")
docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset]

doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")
doc_embedder.warm_up()

docs_with_embeddings = doc_embedder.run(docs)
document_store.write_documents(docs_with_embeddings["documents"])


import torch
from haystack.components.generators import HuggingFaceLocalGenerator

generator = HuggingFaceLocalGenerator(
    model="meta-llama/Llama-3.2-3B-Instruct",
    huggingface_pipeline_kwargs={"device_map":"auto",
                                 "torch_dtype":torch.bfloat16},
    generation_kwargs={"max_new_tokens": 256})

generator.warm_up()

prompt = """<|begin_of_text|><|start_header_id|>user<|end_header_id|>
  What is the capital of Australia?<|eot_id|>
  <|start_header_id|>assistant<|end_header_id|>"""

generator.run(prompt)

from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever

text_embedder = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")
retriever = InMemoryEmbeddingRetriever(document_store, top_k=5)

from haystack.components.builders import PromptBuilder

prompt_template = """
<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Answer the following query given the documents.
If the answer is not contained within the documents reply with 'no_answer'.
If the answer is contained within the documents, start the answer with "FROM THE KNOWLEDGE BASE: ".

Documents:
{% for document in documents %}
  {{document.content}}
{% endfor %}

Query: {{query}}<|eot_id|>

<|start_header_id|>assistant<|end_header_id|>
"""

prompt_builder = PromptBuilder(template=prompt_template)

from haystack.components.routers import ConditionalRouter

routes = [
    {
        "condition": "{{'no_answer' in replies[0]}}",
        "output": "{{query}}",
        "output_name": "go_to_websearch",
        "output_type": str,
    },
    {
        "condition": "{{'no_answer' not in replies[0]}}",
        "output": "{{replies[0]}}",
        "output_name": "answer",
        "output_type": str,
    },
]

router = ConditionalRouter(routes)

from duckduckgo_api_haystack import DuckduckgoApiWebSearch

websearch = DuckduckgoApiWebSearch(top_k=5)

prompt_template_after_websearch = """
<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Answer the following query given the documents retrieved from the web.
Start the answer with "FROM THE WEB: ".

Documents:
{% for document in documents %}
  {{document.content}}
{% endfor %}

Query: {{query}}<|eot_id|>

<|start_header_id|>assistant<|end_header_id|>
"""

prompt_builder_after_websearch = PromptBuilder(template=prompt_template_after_websearch)

from haystack.components.joiners import BranchJoiner
prompt_joiner  = BranchJoiner(str)

from haystack import Pipeline

pipe = Pipeline()
pipe.add_component("text_embedder", text_embedder)
pipe.add_component("retriever", retriever)
pipe.add_component("prompt_builder", prompt_builder)
pipe.add_component("prompt_joiner", prompt_joiner)
pipe.add_component("llm", generator)
pipe.add_component("router", router)
pipe.add_component("websearch", websearch)
pipe.add_component("prompt_builder_after_websearch", prompt_builder_after_websearch)

pipe.connect("text_embedder", "retriever")
pipe.connect("retriever", "prompt_builder.documents")
pipe.connect("prompt_builder", "prompt_joiner")
pipe.connect("prompt_joiner", "llm")
pipe.connect("llm.replies", "router.replies")
pipe.connect("router.go_to_websearch", "websearch.query")
pipe.connect("router.go_to_websearch", "prompt_builder_after_websearch.query")
pipe.connect("websearch.documents", "prompt_builder_after_websearch.documents")
pipe.connect("prompt_builder_after_websearch", "prompt_joiner")

def get_answer(query):
  result = pipe.run({"text_embedder": {"text": query}, "prompt_builder": {"query": query}, "router": {"query": query}})
  print(result["router"]["answer"])


query = "Why did people build Great Pyramid of Giza?"

get_answer(query)
Categories: DBA Blogs

Pages